• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <string.h>
12 
13 #include "third_party/googletest/src/include/gtest/gtest.h"
14 
15 #include "./vp9_rtcd.h"
16 #include "./vpx_config.h"
17 #include "./vpx_dsp_rtcd.h"
18 #include "test/acm_random.h"
19 #include "test/clear_system_state.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_filter.h"
24 #include "vpx_dsp/vpx_dsp_common.h"
25 #include "vpx_dsp/vpx_filter.h"
26 #include "vpx_mem/vpx_mem.h"
27 #include "vpx_ports/mem.h"
28 #include "vpx_ports/vpx_timer.h"
29 
30 namespace {
31 
32 static const unsigned int kMaxDimension = 64;
33 
34 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
35                              uint8_t *dst, ptrdiff_t dst_stride,
36                              const int16_t *filter_x, int filter_x_stride,
37                              const int16_t *filter_y, int filter_y_stride,
38                              int w, int h);
39 
40 typedef void (*WrapperFilterBlock2d8Func)(
41     const uint8_t *src_ptr, const unsigned int src_stride,
42     const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
43     unsigned int dst_stride, unsigned int output_width,
44     unsigned int output_height, int use_highbd);
45 
46 struct ConvolveFunctions {
ConvolveFunctions__anond4a532260111::ConvolveFunctions47   ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
48                     ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
49                     ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8,
50                     ConvolveFunc sh8_avg, ConvolveFunc sv8,
51                     ConvolveFunc sv8_avg, ConvolveFunc shv8,
52                     ConvolveFunc shv8_avg, int bd)
53       : use_highbd_(bd) {
54     copy_[0] = copy;
55     copy_[1] = avg;
56     h8_[0] = h8;
57     h8_[1] = h8_avg;
58     v8_[0] = v8;
59     v8_[1] = v8_avg;
60     hv8_[0] = hv8;
61     hv8_[1] = hv8_avg;
62     sh8_[0] = sh8;
63     sh8_[1] = sh8_avg;
64     sv8_[0] = sv8;
65     sv8_[1] = sv8_avg;
66     shv8_[0] = shv8;
67     shv8_[1] = shv8_avg;
68   }
69 
70   ConvolveFunc copy_[2];
71   ConvolveFunc h8_[2];
72   ConvolveFunc v8_[2];
73   ConvolveFunc hv8_[2];
74   ConvolveFunc sh8_[2];   // scaled horiz
75   ConvolveFunc sv8_[2];   // scaled vert
76   ConvolveFunc shv8_[2];  // scaled horiz/vert
77   int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
78 };
79 
80 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
81 
82 #define ALL_SIZES(convolve_fn)                                            \
83   make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn),         \
84       make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn),     \
85       make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn),   \
86       make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \
87       make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \
88       make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \
89       make_tuple(64, 64, &convolve_fn)
90 
91 // Reference 8-tap subpixel filter, slightly modified to fit into this test.
92 #define VP9_FILTER_WEIGHT 128
93 #define VP9_FILTER_SHIFT 7
clip_pixel(int x)94 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }
95 
filter_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)96 void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
97                         const int16_t *hfilter, const int16_t *vfilter,
98                         uint8_t *dst_ptr, unsigned int dst_stride,
99                         unsigned int output_width, unsigned int output_height) {
100   // Between passes, we use an intermediate buffer whose height is extended to
101   // have enough horizontally filtered values as input for the vertical pass.
102   // This buffer is allocated to be big enough for the largest block type we
103   // support.
104   const int kInterp_Extend = 4;
105   const unsigned int intermediate_height =
106       (kInterp_Extend - 1) + output_height + kInterp_Extend;
107   unsigned int i, j;
108 
109   // Size of intermediate_buffer is max_intermediate_height * filter_max_width,
110   // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
111   //                                 + kInterp_Extend
112   //                               = 3 + 16 + 4
113   //                               = 23
114   // and filter_max_width          = 16
115   //
116   uint8_t intermediate_buffer[71 * kMaxDimension];
117   const int intermediate_next_stride =
118       1 - static_cast<int>(intermediate_height * output_width);
119 
120   // Horizontal pass (src -> transposed intermediate).
121   uint8_t *output_ptr = intermediate_buffer;
122   const int src_next_row_stride = src_stride - output_width;
123   src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
124   for (i = 0; i < intermediate_height; ++i) {
125     for (j = 0; j < output_width; ++j) {
126       // Apply filter...
127       const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
128                        (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
129                        (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
130                        (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
131                        (VP9_FILTER_WEIGHT >> 1);  // Rounding
132 
133       // Normalize back to 0-255...
134       *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT);
135       ++src_ptr;
136       output_ptr += intermediate_height;
137     }
138     src_ptr += src_next_row_stride;
139     output_ptr += intermediate_next_stride;
140   }
141 
142   // Vertical pass (transposed intermediate -> dst).
143   src_ptr = intermediate_buffer;
144   const int dst_next_row_stride = dst_stride - output_width;
145   for (i = 0; i < output_height; ++i) {
146     for (j = 0; j < output_width; ++j) {
147       // Apply filter...
148       const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
149                        (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
150                        (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
151                        (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
152                        (VP9_FILTER_WEIGHT >> 1);  // Rounding
153 
154       // Normalize back to 0-255...
155       *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT);
156       src_ptr += intermediate_height;
157     }
158     src_ptr += intermediate_next_stride;
159     dst_ptr += dst_next_row_stride;
160   }
161 }
162 
block2d_average_c(uint8_t * src,unsigned int src_stride,uint8_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)163 void block2d_average_c(uint8_t *src, unsigned int src_stride,
164                        uint8_t *output_ptr, unsigned int output_stride,
165                        unsigned int output_width, unsigned int output_height) {
166   unsigned int i, j;
167   for (i = 0; i < output_height; ++i) {
168     for (j = 0; j < output_width; ++j) {
169       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
170     }
171     output_ptr += output_stride;
172   }
173 }
174 
filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height)175 void filter_average_block2d_8_c(const uint8_t *src_ptr,
176                                 const unsigned int src_stride,
177                                 const int16_t *hfilter, const int16_t *vfilter,
178                                 uint8_t *dst_ptr, unsigned int dst_stride,
179                                 unsigned int output_width,
180                                 unsigned int output_height) {
181   uint8_t tmp[kMaxDimension * kMaxDimension];
182 
183   assert(output_width <= kMaxDimension);
184   assert(output_height <= kMaxDimension);
185   filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
186                      output_width, output_height);
187   block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height);
188 }
189 
190 #if CONFIG_VP9_HIGHBITDEPTH
highbd_filter_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)191 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
192                                const unsigned int src_stride,
193                                const int16_t *hfilter, const int16_t *vfilter,
194                                uint16_t *dst_ptr, unsigned int dst_stride,
195                                unsigned int output_width,
196                                unsigned int output_height, int bd) {
197   // Between passes, we use an intermediate buffer whose height is extended to
198   // have enough horizontally filtered values as input for the vertical pass.
199   // This buffer is allocated to be big enough for the largest block type we
200   // support.
201   const int kInterp_Extend = 4;
202   const unsigned int intermediate_height =
203       (kInterp_Extend - 1) + output_height + kInterp_Extend;
204 
205   /* Size of intermediate_buffer is max_intermediate_height * filter_max_width,
206    * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height
207    *                                 + kInterp_Extend
208    *                               = 3 + 16 + 4
209    *                               = 23
210    * and filter_max_width = 16
211    */
212   uint16_t intermediate_buffer[71 * kMaxDimension];
213   const int intermediate_next_stride =
214       1 - static_cast<int>(intermediate_height * output_width);
215 
216   // Horizontal pass (src -> transposed intermediate).
217   {
218     uint16_t *output_ptr = intermediate_buffer;
219     const int src_next_row_stride = src_stride - output_width;
220     unsigned int i, j;
221     src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1);
222     for (i = 0; i < intermediate_height; ++i) {
223       for (j = 0; j < output_width; ++j) {
224         // Apply filter...
225         const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
226                          (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
227                          (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
228                          (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
229                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
230 
231         // Normalize back to 0-255...
232         *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
233         ++src_ptr;
234         output_ptr += intermediate_height;
235       }
236       src_ptr += src_next_row_stride;
237       output_ptr += intermediate_next_stride;
238     }
239   }
240 
241   // Vertical pass (transposed intermediate -> dst).
242   {
243     uint16_t *src_ptr = intermediate_buffer;
244     const int dst_next_row_stride = dst_stride - output_width;
245     unsigned int i, j;
246     for (i = 0; i < output_height; ++i) {
247       for (j = 0; j < output_width; ++j) {
248         // Apply filter...
249         const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
250                          (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
251                          (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
252                          (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
253                          (VP9_FILTER_WEIGHT >> 1);  // Rounding
254 
255         // Normalize back to 0-255...
256         *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd);
257         src_ptr += intermediate_height;
258       }
259       src_ptr += intermediate_next_stride;
260       dst_ptr += dst_next_row_stride;
261     }
262   }
263 }
264 
highbd_block2d_average_c(uint16_t * src,unsigned int src_stride,uint16_t * output_ptr,unsigned int output_stride,unsigned int output_width,unsigned int output_height)265 void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,
266                               uint16_t *output_ptr, unsigned int output_stride,
267                               unsigned int output_width,
268                               unsigned int output_height) {
269   unsigned int i, j;
270   for (i = 0; i < output_height; ++i) {
271     for (j = 0; j < output_width; ++j) {
272       output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1;
273     }
274     output_ptr += output_stride;
275   }
276 }
277 
highbd_filter_average_block2d_8_c(const uint16_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint16_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int bd)278 void highbd_filter_average_block2d_8_c(
279     const uint16_t *src_ptr, const unsigned int src_stride,
280     const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr,
281     unsigned int dst_stride, unsigned int output_width,
282     unsigned int output_height, int bd) {
283   uint16_t tmp[kMaxDimension * kMaxDimension];
284 
285   assert(output_width <= kMaxDimension);
286   assert(output_height <= kMaxDimension);
287   highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
288                             output_width, output_height, bd);
289   highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width,
290                            output_height);
291 }
292 #endif  // CONFIG_VP9_HIGHBITDEPTH
293 
wrapper_filter_average_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int use_highbd)294 void wrapper_filter_average_block2d_8_c(
295     const uint8_t *src_ptr, const unsigned int src_stride,
296     const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
297     unsigned int dst_stride, unsigned int output_width,
298     unsigned int output_height, int use_highbd) {
299 #if CONFIG_VP9_HIGHBITDEPTH
300   if (use_highbd == 0) {
301     filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
302                                dst_stride, output_width, output_height);
303   } else {
304     highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride,
305                                       hfilter, vfilter,
306                                       CAST_TO_SHORTPTR(dst_ptr), dst_stride,
307                                       output_width, output_height, use_highbd);
308   }
309 #else
310   ASSERT_EQ(0, use_highbd);
311   filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
312                              dst_stride, output_width, output_height);
313 #endif
314 }
315 
wrapper_filter_block2d_8_c(const uint8_t * src_ptr,const unsigned int src_stride,const int16_t * hfilter,const int16_t * vfilter,uint8_t * dst_ptr,unsigned int dst_stride,unsigned int output_width,unsigned int output_height,int use_highbd)316 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
317                                 const unsigned int src_stride,
318                                 const int16_t *hfilter, const int16_t *vfilter,
319                                 uint8_t *dst_ptr, unsigned int dst_stride,
320                                 unsigned int output_width,
321                                 unsigned int output_height, int use_highbd) {
322 #if CONFIG_VP9_HIGHBITDEPTH
323   if (use_highbd == 0) {
324     filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
325                        dst_stride, output_width, output_height);
326   } else {
327     highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter,
328                               vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride,
329                               output_width, output_height, use_highbd);
330   }
331 #else
332   ASSERT_EQ(0, use_highbd);
333   filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride,
334                      output_width, output_height);
335 #endif
336 }
337 
338 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
339  public:
SetUpTestCase()340   static void SetUpTestCase() {
341     // Force input_ to be unaligned, output to be 16 byte aligned.
342     input_ = reinterpret_cast<uint8_t *>(
343                  vpx_memalign(kDataAlignment, kInputBufferSize + 1)) +
344              1;
345     output_ = reinterpret_cast<uint8_t *>(
346         vpx_memalign(kDataAlignment, kOutputBufferSize));
347     output_ref_ = reinterpret_cast<uint8_t *>(
348         vpx_memalign(kDataAlignment, kOutputBufferSize));
349 #if CONFIG_VP9_HIGHBITDEPTH
350     input16_ = reinterpret_cast<uint16_t *>(vpx_memalign(
351                    kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) +
352                1;
353     output16_ = reinterpret_cast<uint16_t *>(
354         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
355     output16_ref_ = reinterpret_cast<uint16_t *>(
356         vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
357 #endif
358   }
359 
TearDown()360   virtual void TearDown() { libvpx_test::ClearSystemState(); }
361 
TearDownTestCase()362   static void TearDownTestCase() {
363     vpx_free(input_ - 1);
364     input_ = NULL;
365     vpx_free(output_);
366     output_ = NULL;
367     vpx_free(output_ref_);
368     output_ref_ = NULL;
369 #if CONFIG_VP9_HIGHBITDEPTH
370     vpx_free(input16_ - 1);
371     input16_ = NULL;
372     vpx_free(output16_);
373     output16_ = NULL;
374     vpx_free(output16_ref_);
375     output16_ref_ = NULL;
376 #endif
377   }
378 
379  protected:
380   static const int kDataAlignment = 16;
381   static const int kOuterBlockSize = 256;
382   static const int kInputStride = kOuterBlockSize;
383   static const int kOutputStride = kOuterBlockSize;
384   static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize;
385   static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize;
386 
Width() const387   int Width() const { return GET_PARAM(0); }
Height() const388   int Height() const { return GET_PARAM(1); }
BorderLeft() const389   int BorderLeft() const {
390     const int center = (kOuterBlockSize - Width()) / 2;
391     return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1);
392   }
BorderTop() const393   int BorderTop() const { return (kOuterBlockSize - Height()) / 2; }
394 
IsIndexInBorder(int i)395   bool IsIndexInBorder(int i) {
396     return (i < BorderTop() * kOuterBlockSize ||
397             i >= (BorderTop() + Height()) * kOuterBlockSize ||
398             i % kOuterBlockSize < BorderLeft() ||
399             i % kOuterBlockSize >= (BorderLeft() + Width()));
400   }
401 
SetUp()402   virtual void SetUp() {
403     UUT_ = GET_PARAM(2);
404 #if CONFIG_VP9_HIGHBITDEPTH
405     if (UUT_->use_highbd_ != 0) {
406       mask_ = (1 << UUT_->use_highbd_) - 1;
407     } else {
408       mask_ = 255;
409     }
410 #endif
411     /* Set up guard blocks for an inner block centered in the outer block */
412     for (int i = 0; i < kOutputBufferSize; ++i) {
413       if (IsIndexInBorder(i)) {
414         output_[i] = 255;
415       } else {
416         output_[i] = 0;
417       }
418     }
419 
420     ::libvpx_test::ACMRandom prng;
421     for (int i = 0; i < kInputBufferSize; ++i) {
422       if (i & 1) {
423         input_[i] = 255;
424 #if CONFIG_VP9_HIGHBITDEPTH
425         input16_[i] = mask_;
426 #endif
427       } else {
428         input_[i] = prng.Rand8Extremes();
429 #if CONFIG_VP9_HIGHBITDEPTH
430         input16_[i] = prng.Rand16() & mask_;
431 #endif
432       }
433     }
434   }
435 
SetConstantInput(int value)436   void SetConstantInput(int value) {
437     memset(input_, value, kInputBufferSize);
438 #if CONFIG_VP9_HIGHBITDEPTH
439     vpx_memset16(input16_, value, kInputBufferSize);
440 #endif
441   }
442 
CopyOutputToRef()443   void CopyOutputToRef() {
444     memcpy(output_ref_, output_, kOutputBufferSize);
445 #if CONFIG_VP9_HIGHBITDEPTH
446     memcpy(output16_ref_, output16_,
447            kOutputBufferSize * sizeof(output16_ref_[0]));
448 #endif
449   }
450 
CheckGuardBlocks()451   void CheckGuardBlocks() {
452     for (int i = 0; i < kOutputBufferSize; ++i) {
453       if (IsIndexInBorder(i)) EXPECT_EQ(255, output_[i]);
454     }
455   }
456 
input() const457   uint8_t *input() const {
458     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
459 #if CONFIG_VP9_HIGHBITDEPTH
460     if (UUT_->use_highbd_ == 0) {
461       return input_ + offset;
462     } else {
463       return CAST_TO_BYTEPTR(input16_ + offset);
464     }
465 #else
466     return input_ + offset;
467 #endif
468   }
469 
output() const470   uint8_t *output() const {
471     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
472 #if CONFIG_VP9_HIGHBITDEPTH
473     if (UUT_->use_highbd_ == 0) {
474       return output_ + offset;
475     } else {
476       return CAST_TO_BYTEPTR(output16_ + offset);
477     }
478 #else
479     return output_ + offset;
480 #endif
481   }
482 
output_ref() const483   uint8_t *output_ref() const {
484     const int offset = BorderTop() * kOuterBlockSize + BorderLeft();
485 #if CONFIG_VP9_HIGHBITDEPTH
486     if (UUT_->use_highbd_ == 0) {
487       return output_ref_ + offset;
488     } else {
489       return CAST_TO_BYTEPTR(output16_ref_ + offset);
490     }
491 #else
492     return output_ref_ + offset;
493 #endif
494   }
495 
lookup(uint8_t * list,int index) const496   uint16_t lookup(uint8_t *list, int index) const {
497 #if CONFIG_VP9_HIGHBITDEPTH
498     if (UUT_->use_highbd_ == 0) {
499       return list[index];
500     } else {
501       return CAST_TO_SHORTPTR(list)[index];
502     }
503 #else
504     return list[index];
505 #endif
506   }
507 
assign_val(uint8_t * list,int index,uint16_t val) const508   void assign_val(uint8_t *list, int index, uint16_t val) const {
509 #if CONFIG_VP9_HIGHBITDEPTH
510     if (UUT_->use_highbd_ == 0) {
511       list[index] = (uint8_t)val;
512     } else {
513       CAST_TO_SHORTPTR(list)[index] = val;
514     }
515 #else
516     list[index] = (uint8_t)val;
517 #endif
518   }
519 
520   const ConvolveFunctions *UUT_;
521   static uint8_t *input_;
522   static uint8_t *output_;
523   static uint8_t *output_ref_;
524 #if CONFIG_VP9_HIGHBITDEPTH
525   static uint16_t *input16_;
526   static uint16_t *output16_;
527   static uint16_t *output16_ref_;
528   int mask_;
529 #endif
530 };
531 
532 uint8_t *ConvolveTest::input_ = NULL;
533 uint8_t *ConvolveTest::output_ = NULL;
534 uint8_t *ConvolveTest::output_ref_ = NULL;
535 #if CONFIG_VP9_HIGHBITDEPTH
536 uint16_t *ConvolveTest::input16_ = NULL;
537 uint16_t *ConvolveTest::output16_ = NULL;
538 uint16_t *ConvolveTest::output16_ref_ = NULL;
539 #endif
540 
TEST_P(ConvolveTest,GuardBlocks)541 TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); }
542 
TEST_P(ConvolveTest,DISABLED_Copy_Speed)543 TEST_P(ConvolveTest, DISABLED_Copy_Speed) {
544   const uint8_t *const in = input();
545   uint8_t *const out = output();
546   const int kNumTests = 5000000;
547   const int width = Width();
548   const int height = Height();
549   vpx_usec_timer timer;
550 
551   vpx_usec_timer_start(&timer);
552   for (int n = 0; n < kNumTests; ++n) {
553     UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
554                    width, height);
555   }
556   vpx_usec_timer_mark(&timer);
557 
558   const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
559   printf("convolve_copy_%dx%d_%d: %d us\n", width, height,
560          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
561 }
562 
TEST_P(ConvolveTest,DISABLED_Avg_Speed)563 TEST_P(ConvolveTest, DISABLED_Avg_Speed) {
564   const uint8_t *const in = input();
565   uint8_t *const out = output();
566   const int kNumTests = 5000000;
567   const int width = Width();
568   const int height = Height();
569   vpx_usec_timer timer;
570 
571   vpx_usec_timer_start(&timer);
572   for (int n = 0; n < kNumTests; ++n) {
573     UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
574                    width, height);
575   }
576   vpx_usec_timer_mark(&timer);
577 
578   const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
579   printf("convolve_avg_%dx%d_%d: %d us\n", width, height,
580          UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time);
581 }
582 
TEST_P(ConvolveTest,Copy)583 TEST_P(ConvolveTest, Copy) {
584   uint8_t *const in = input();
585   uint8_t *const out = output();
586 
587   ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
588                                           NULL, 0, NULL, 0, Width(), Height()));
589 
590   CheckGuardBlocks();
591 
592   for (int y = 0; y < Height(); ++y) {
593     for (int x = 0; x < Width(); ++x)
594       ASSERT_EQ(lookup(out, y * kOutputStride + x),
595                 lookup(in, y * kInputStride + x))
596           << "(" << x << "," << y << ")";
597   }
598 }
599 
TEST_P(ConvolveTest,Avg)600 TEST_P(ConvolveTest, Avg) {
601   uint8_t *const in = input();
602   uint8_t *const out = output();
603   uint8_t *const out_ref = output_ref();
604   CopyOutputToRef();
605 
606   ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
607                                           NULL, 0, NULL, 0, Width(), Height()));
608 
609   CheckGuardBlocks();
610 
611   for (int y = 0; y < Height(); ++y) {
612     for (int x = 0; x < Width(); ++x)
613       ASSERT_EQ(lookup(out, y * kOutputStride + x),
614                 ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
615                                        lookup(out_ref, y * kOutputStride + x),
616                                    1))
617           << "(" << x << "," << y << ")";
618   }
619 }
620 
TEST_P(ConvolveTest,CopyHoriz)621 TEST_P(ConvolveTest, CopyHoriz) {
622   uint8_t *const in = input();
623   uint8_t *const out = output();
624   DECLARE_ALIGNED(256, const int16_t,
625                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
626 
627   ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
628                                          filter8, 16, filter8, 16, Width(),
629                                          Height()));
630 
631   CheckGuardBlocks();
632 
633   for (int y = 0; y < Height(); ++y) {
634     for (int x = 0; x < Width(); ++x)
635       ASSERT_EQ(lookup(out, y * kOutputStride + x),
636                 lookup(in, y * kInputStride + x))
637           << "(" << x << "," << y << ")";
638   }
639 }
640 
TEST_P(ConvolveTest,CopyVert)641 TEST_P(ConvolveTest, CopyVert) {
642   uint8_t *const in = input();
643   uint8_t *const out = output();
644   DECLARE_ALIGNED(256, const int16_t,
645                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
646 
647   ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
648                                          filter8, 16, filter8, 16, Width(),
649                                          Height()));
650 
651   CheckGuardBlocks();
652 
653   for (int y = 0; y < Height(); ++y) {
654     for (int x = 0; x < Width(); ++x)
655       ASSERT_EQ(lookup(out, y * kOutputStride + x),
656                 lookup(in, y * kInputStride + x))
657           << "(" << x << "," << y << ")";
658   }
659 }
660 
TEST_P(ConvolveTest,Copy2D)661 TEST_P(ConvolveTest, Copy2D) {
662   uint8_t *const in = input();
663   uint8_t *const out = output();
664   DECLARE_ALIGNED(256, const int16_t,
665                   filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };
666 
667   ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
668                                           filter8, 16, filter8, 16, Width(),
669                                           Height()));
670 
671   CheckGuardBlocks();
672 
673   for (int y = 0; y < Height(); ++y) {
674     for (int x = 0; x < Width(); ++x)
675       ASSERT_EQ(lookup(out, y * kOutputStride + x),
676                 lookup(in, y * kInputStride + x))
677           << "(" << x << "," << y << ")";
678   }
679 }
680 
681 const int kNumFilterBanks = 4;
682 const int kNumFilters = 16;
683 
TEST(ConvolveTest,FiltersWontSaturateWhenAddedPairwise)684 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
685   for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
686     const InterpKernel *filters =
687         vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
688     for (int i = 0; i < kNumFilters; i++) {
689       const int p0 = filters[i][0] + filters[i][1];
690       const int p1 = filters[i][2] + filters[i][3];
691       const int p2 = filters[i][4] + filters[i][5];
692       const int p3 = filters[i][6] + filters[i][7];
693       EXPECT_LE(p0, 128);
694       EXPECT_LE(p1, 128);
695       EXPECT_LE(p2, 128);
696       EXPECT_LE(p3, 128);
697       EXPECT_LE(p0 + p3, 128);
698       EXPECT_LE(p0 + p3 + p1, 128);
699       EXPECT_LE(p0 + p3 + p1 + p2, 128);
700       EXPECT_EQ(p0 + p1 + p2 + p3, 128);
701     }
702   }
703 }
704 
705 const int16_t kInvalidFilter[8] = { 0 };
706 const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
707   wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
708 };
709 
TEST_P(ConvolveTest,MatchesReferenceSubpixelFilter)710 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
711   for (int i = 0; i < 2; ++i) {
712     uint8_t *const in = input();
713     uint8_t *const out = output();
714 #if CONFIG_VP9_HIGHBITDEPTH
715     uint8_t ref8[kOutputStride * kMaxDimension];
716     uint16_t ref16[kOutputStride * kMaxDimension];
717     uint8_t *ref;
718     if (UUT_->use_highbd_ == 0) {
719       ref = ref8;
720     } else {
721       ref = CAST_TO_BYTEPTR(ref16);
722     }
723 #else
724     uint8_t ref[kOutputStride * kMaxDimension];
725 #endif
726 
727     // Populate ref and out with some random data
728     ::libvpx_test::ACMRandom prng;
729     for (int y = 0; y < Height(); ++y) {
730       for (int x = 0; x < Width(); ++x) {
731         uint16_t r;
732 #if CONFIG_VP9_HIGHBITDEPTH
733         if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
734           r = prng.Rand8Extremes();
735         } else {
736           r = prng.Rand16() & mask_;
737         }
738 #else
739         r = prng.Rand8Extremes();
740 #endif
741 
742         assign_val(out, y * kOutputStride + x, r);
743         assign_val(ref, y * kOutputStride + x, r);
744       }
745     }
746 
747     for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
748       const InterpKernel *filters =
749           vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
750 
751       for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
752         for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
753           wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x],
754                                       filters[filter_y], ref, kOutputStride,
755                                       Width(), Height(), UUT_->use_highbd_);
756 
757           if (filter_x && filter_y)
758             ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
759                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
760                 filters[filter_y], 16, Width(), Height()));
761           else if (filter_y)
762             ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
763                 in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
764                 filters[filter_y], 16, Width(), Height()));
765           else if (filter_x)
766             ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
767                 in, kInputStride, out, kOutputStride, filters[filter_x], 16,
768                 kInvalidFilter, 16, Width(), Height()));
769           else
770             ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
771                 in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
772                 kInvalidFilter, 0, Width(), Height()));
773 
774           CheckGuardBlocks();
775 
776           for (int y = 0; y < Height(); ++y) {
777             for (int x = 0; x < Width(); ++x)
778               ASSERT_EQ(lookup(ref, y * kOutputStride + x),
779                         lookup(out, y * kOutputStride + x))
780                   << "mismatch at (" << x << "," << y << "), "
781                   << "filters (" << filter_bank << "," << filter_x << ","
782                   << filter_y << ")";
783           }
784         }
785       }
786     }
787   }
788 }
789 
TEST_P(ConvolveTest,FilterExtremes)790 TEST_P(ConvolveTest, FilterExtremes) {
791   uint8_t *const in = input();
792   uint8_t *const out = output();
793 #if CONFIG_VP9_HIGHBITDEPTH
794   uint8_t ref8[kOutputStride * kMaxDimension];
795   uint16_t ref16[kOutputStride * kMaxDimension];
796   uint8_t *ref;
797   if (UUT_->use_highbd_ == 0) {
798     ref = ref8;
799   } else {
800     ref = CAST_TO_BYTEPTR(ref16);
801   }
802 #else
803   uint8_t ref[kOutputStride * kMaxDimension];
804 #endif
805 
806   // Populate ref and out with some random data
807   ::libvpx_test::ACMRandom prng;
808   for (int y = 0; y < Height(); ++y) {
809     for (int x = 0; x < Width(); ++x) {
810       uint16_t r;
811 #if CONFIG_VP9_HIGHBITDEPTH
812       if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
813         r = prng.Rand8Extremes();
814       } else {
815         r = prng.Rand16() & mask_;
816       }
817 #else
818       r = prng.Rand8Extremes();
819 #endif
820       assign_val(out, y * kOutputStride + x, r);
821       assign_val(ref, y * kOutputStride + x, r);
822     }
823   }
824 
825   for (int axis = 0; axis < 2; axis++) {
826     int seed_val = 0;
827     while (seed_val < 256) {
828       for (int y = 0; y < 8; ++y) {
829         for (int x = 0; x < 8; ++x) {
830 #if CONFIG_VP9_HIGHBITDEPTH
831           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
832                      ((seed_val >> (axis ? y : x)) & 1) * mask_);
833 #else
834           assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1,
835                      ((seed_val >> (axis ? y : x)) & 1) * 255);
836 #endif
837           if (axis) seed_val++;
838         }
839         if (axis) {
840           seed_val -= 8;
841         } else {
842           seed_val++;
843         }
844       }
845       if (axis) seed_val += 8;
846 
847       for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
848         const InterpKernel *filters =
849             vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
850         for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
851           for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
852             wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
853                                        filters[filter_y], ref, kOutputStride,
854                                        Width(), Height(), UUT_->use_highbd_);
855             if (filter_x && filter_y)
856               ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
857                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
858                   filters[filter_y], 16, Width(), Height()));
859             else if (filter_y)
860               ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
861                   in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
862                   filters[filter_y], 16, Width(), Height()));
863             else if (filter_x)
864               ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
865                   in, kInputStride, out, kOutputStride, filters[filter_x], 16,
866                   kInvalidFilter, 16, Width(), Height()));
867             else
868               ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
869                   in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
870                   kInvalidFilter, 0, Width(), Height()));
871 
872             for (int y = 0; y < Height(); ++y) {
873               for (int x = 0; x < Width(); ++x)
874                 ASSERT_EQ(lookup(ref, y * kOutputStride + x),
875                           lookup(out, y * kOutputStride + x))
876                     << "mismatch at (" << x << "," << y << "), "
877                     << "filters (" << filter_bank << "," << filter_x << ","
878                     << filter_y << ")";
879             }
880           }
881         }
882       }
883     }
884   }
885 }
886 
887 /* This test exercises that enough rows and columns are filtered with every
888    possible initial fractional positions and scaling steps. */
TEST_P(ConvolveTest,CheckScalingFiltering)889 TEST_P(ConvolveTest, CheckScalingFiltering) {
890   uint8_t *const in = input();
891   uint8_t *const out = output();
892   const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP];
893 
894   SetConstantInput(127);
895 
896   for (int frac = 0; frac < 16; ++frac) {
897     for (int step = 1; step <= 32; ++step) {
898       /* Test the horizontal and vertical filters in combination. */
899       ASM_REGISTER_STATE_CHECK(
900           UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
901                          step, eighttap[frac], step, Width(), Height()));
902 
903       CheckGuardBlocks();
904 
905       for (int y = 0; y < Height(); ++y) {
906         for (int x = 0; x < Width(); ++x) {
907           ASSERT_EQ(lookup(in, y * kInputStride + x),
908                     lookup(out, y * kOutputStride + x))
909               << "x == " << x << ", y == " << y << ", frac == " << frac
910               << ", step == " << step;
911         }
912       }
913     }
914   }
915 }
916 
917 using std::tr1::make_tuple;
918 
919 #if CONFIG_VP9_HIGHBITDEPTH
920 #define WRAP(func, bd)                                                         \
921   void wrap_##func##_##bd(                                                     \
922       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                  \
923       ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride,      \
924       const int16_t *filter_y, int filter_y_stride, int w, int h) {            \
925     vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride,     \
926                       reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \
927                       filter_x_stride, filter_y, filter_y_stride, w, h, bd);   \
928   }
929 
930 #if HAVE_SSE2 && ARCH_X86_64
931 WRAP(convolve_copy_sse2, 8)
932 WRAP(convolve_avg_sse2, 8)
933 WRAP(convolve_copy_sse2, 10)
934 WRAP(convolve_avg_sse2, 10)
935 WRAP(convolve_copy_sse2, 12)
936 WRAP(convolve_avg_sse2, 12)
937 WRAP(convolve8_horiz_sse2, 8)
938 WRAP(convolve8_avg_horiz_sse2, 8)
939 WRAP(convolve8_vert_sse2, 8)
940 WRAP(convolve8_avg_vert_sse2, 8)
941 WRAP(convolve8_sse2, 8)
942 WRAP(convolve8_avg_sse2, 8)
943 WRAP(convolve8_horiz_sse2, 10)
944 WRAP(convolve8_avg_horiz_sse2, 10)
945 WRAP(convolve8_vert_sse2, 10)
946 WRAP(convolve8_avg_vert_sse2, 10)
947 WRAP(convolve8_sse2, 10)
948 WRAP(convolve8_avg_sse2, 10)
949 WRAP(convolve8_horiz_sse2, 12)
950 WRAP(convolve8_avg_horiz_sse2, 12)
951 WRAP(convolve8_vert_sse2, 12)
952 WRAP(convolve8_avg_vert_sse2, 12)
953 WRAP(convolve8_sse2, 12)
954 WRAP(convolve8_avg_sse2, 12)
955 #endif  // HAVE_SSE2 && ARCH_X86_64
956 
957 #if HAVE_AVX2
958 WRAP(convolve_copy_avx2, 8)
959 WRAP(convolve_avg_avx2, 8)
960 WRAP(convolve8_horiz_avx2, 8)
961 WRAP(convolve8_avg_horiz_avx2, 8)
962 WRAP(convolve8_vert_avx2, 8)
963 WRAP(convolve8_avg_vert_avx2, 8)
964 WRAP(convolve8_avx2, 8)
965 WRAP(convolve8_avg_avx2, 8)
966 
967 WRAP(convolve_copy_avx2, 10)
968 WRAP(convolve_avg_avx2, 10)
969 WRAP(convolve8_avx2, 10)
970 WRAP(convolve8_horiz_avx2, 10)
971 WRAP(convolve8_vert_avx2, 10)
972 WRAP(convolve8_avg_avx2, 10)
973 WRAP(convolve8_avg_horiz_avx2, 10)
974 WRAP(convolve8_avg_vert_avx2, 10)
975 
976 WRAP(convolve_copy_avx2, 12)
977 WRAP(convolve_avg_avx2, 12)
978 WRAP(convolve8_avx2, 12)
979 WRAP(convolve8_horiz_avx2, 12)
980 WRAP(convolve8_vert_avx2, 12)
981 WRAP(convolve8_avg_avx2, 12)
982 WRAP(convolve8_avg_horiz_avx2, 12)
983 WRAP(convolve8_avg_vert_avx2, 12)
984 #endif  // HAVE_AVX2
985 
986 #if HAVE_NEON
987 WRAP(convolve_copy_neon, 8)
988 WRAP(convolve_avg_neon, 8)
989 WRAP(convolve_copy_neon, 10)
990 WRAP(convolve_avg_neon, 10)
991 WRAP(convolve_copy_neon, 12)
992 WRAP(convolve_avg_neon, 12)
993 WRAP(convolve8_horiz_neon, 8)
994 WRAP(convolve8_avg_horiz_neon, 8)
995 WRAP(convolve8_vert_neon, 8)
996 WRAP(convolve8_avg_vert_neon, 8)
997 WRAP(convolve8_neon, 8)
998 WRAP(convolve8_avg_neon, 8)
999 WRAP(convolve8_horiz_neon, 10)
1000 WRAP(convolve8_avg_horiz_neon, 10)
1001 WRAP(convolve8_vert_neon, 10)
1002 WRAP(convolve8_avg_vert_neon, 10)
1003 WRAP(convolve8_neon, 10)
1004 WRAP(convolve8_avg_neon, 10)
1005 WRAP(convolve8_horiz_neon, 12)
1006 WRAP(convolve8_avg_horiz_neon, 12)
1007 WRAP(convolve8_vert_neon, 12)
1008 WRAP(convolve8_avg_vert_neon, 12)
1009 WRAP(convolve8_neon, 12)
1010 WRAP(convolve8_avg_neon, 12)
1011 #endif  // HAVE_NEON
1012 
1013 WRAP(convolve_copy_c, 8)
1014 WRAP(convolve_avg_c, 8)
1015 WRAP(convolve8_horiz_c, 8)
1016 WRAP(convolve8_avg_horiz_c, 8)
1017 WRAP(convolve8_vert_c, 8)
1018 WRAP(convolve8_avg_vert_c, 8)
1019 WRAP(convolve8_c, 8)
1020 WRAP(convolve8_avg_c, 8)
1021 WRAP(convolve_copy_c, 10)
1022 WRAP(convolve_avg_c, 10)
1023 WRAP(convolve8_horiz_c, 10)
1024 WRAP(convolve8_avg_horiz_c, 10)
1025 WRAP(convolve8_vert_c, 10)
1026 WRAP(convolve8_avg_vert_c, 10)
1027 WRAP(convolve8_c, 10)
1028 WRAP(convolve8_avg_c, 10)
1029 WRAP(convolve_copy_c, 12)
1030 WRAP(convolve_avg_c, 12)
1031 WRAP(convolve8_horiz_c, 12)
1032 WRAP(convolve8_avg_horiz_c, 12)
1033 WRAP(convolve8_vert_c, 12)
1034 WRAP(convolve8_avg_vert_c, 12)
1035 WRAP(convolve8_c, 12)
1036 WRAP(convolve8_avg_c, 12)
1037 #undef WRAP
1038 
1039 const ConvolveFunctions convolve8_c(
1040     wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8,
1041     wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1042     wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8,
1043     wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
1044     wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8,
1045     wrap_convolve8_avg_c_8, 8);
1046 const ConvolveFunctions convolve10_c(
1047     wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10,
1048     wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10,
1049     wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10,
1050     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1051     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1052     wrap_convolve8_avg_c_10, 10);
1053 const ConvolveFunctions convolve12_c(
1054     wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12,
1055     wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12,
1056     wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12,
1057     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1058     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1059     wrap_convolve8_avg_c_12, 12);
1060 const ConvolveParam kArrayConvolve_c[] = {
1061   ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c)
1062 };
1063 
1064 #else
1065 const ConvolveFunctions convolve8_c(
1066     vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c,
1067     vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c,
1068     vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c,
1069     vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c,
1070     vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1071 const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) };
1072 #endif
1073 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
1074 
1075 #if HAVE_SSE2 && ARCH_X86_64
1076 #if CONFIG_VP9_HIGHBITDEPTH
1077 const ConvolveFunctions convolve8_sse2(
1078     wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
1079     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1080     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1081     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8,
1082     wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
1083     wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
1084     wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
1085 const ConvolveFunctions convolve10_sse2(
1086     wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10,
1087     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1088     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1089     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10,
1090     wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
1091     wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
1092     wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
1093 const ConvolveFunctions convolve12_sse2(
1094     wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12,
1095     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1096     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1097     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12,
1098     wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
1099     wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
1100     wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
1101 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2),
1102                                               ALL_SIZES(convolve10_sse2),
1103                                               ALL_SIZES(convolve12_sse2) };
1104 #else
1105 const ConvolveFunctions convolve8_sse2(
1106     vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2,
1107     vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2,
1108     vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2,
1109     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1110     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1111 
1112 const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) };
1113 #endif  // CONFIG_VP9_HIGHBITDEPTH
1114 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest,
1115                         ::testing::ValuesIn(kArrayConvolve_sse2));
1116 #endif
1117 
1118 #if HAVE_SSSE3
1119 const ConvolveFunctions convolve8_ssse3(
1120     vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3,
1121     vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3,
1122     vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3,
1123     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1124     vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0);
1125 
1126 const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) };
1127 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest,
1128                         ::testing::ValuesIn(kArrayConvolve8_ssse3));
1129 #endif
1130 
1131 #if HAVE_AVX2
1132 #if CONFIG_VP9_HIGHBITDEPTH
1133 const ConvolveFunctions convolve8_avx2(
1134     wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8,
1135     wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8,
1136     wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8,
1137     wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8,
1138     wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8,
1139     wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
1140 const ConvolveFunctions convolve10_avx2(
1141     wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10,
1142     wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10,
1143     wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10,
1144     wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10,
1145     wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
1146     wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10,
1147     wrap_convolve8_avg_c_10, 10);
1148 const ConvolveFunctions convolve12_avx2(
1149     wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12,
1150     wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12,
1151     wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12,
1152     wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12,
1153     wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
1154     wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12,
1155     wrap_convolve8_avg_c_12, 12);
1156 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2),
1157                                                ALL_SIZES(convolve10_avx2),
1158                                                ALL_SIZES(convolve12_avx2) };
1159 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1160                         ::testing::ValuesIn(kArrayConvolve8_avx2));
1161 #else   // !CONFIG_VP9_HIGHBITDEPTH
1162 const ConvolveFunctions convolve8_avx2(
1163     vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2,
1164     vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_avx2,
1165     vpx_convolve8_avg_vert_ssse3, vpx_convolve8_avx2, vpx_convolve8_avg_ssse3,
1166     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1167     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1168 const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) };
1169 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
1170                         ::testing::ValuesIn(kArrayConvolve8_avx2));
1171 #endif  // CONFIG_VP9_HIGHBITDEPTH
1172 #endif  // HAVE_AVX2
1173 
1174 #if HAVE_NEON
1175 #if CONFIG_VP9_HIGHBITDEPTH
1176 const ConvolveFunctions convolve8_neon(
1177     wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8,
1178     wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
1179     wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
1180     wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8,
1181     wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
1182     wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
1183     wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8);
1184 const ConvolveFunctions convolve10_neon(
1185     wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10,
1186     wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
1187     wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
1188     wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10,
1189     wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
1190     wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
1191     wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10);
1192 const ConvolveFunctions convolve12_neon(
1193     wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12,
1194     wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
1195     wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
1196     wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12,
1197     wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
1198     wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
1199     wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12);
1200 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon),
1201                                               ALL_SIZES(convolve10_neon),
1202                                               ALL_SIZES(convolve12_neon) };
1203 #else
1204 const ConvolveFunctions convolve8_neon(
1205     vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
1206     vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
1207     vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
1208     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1209     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1210 
1211 const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) };
1212 #endif  // CONFIG_VP9_HIGHBITDEPTH
1213 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
1214                         ::testing::ValuesIn(kArrayConvolve_neon));
1215 #endif  // HAVE_NEON
1216 
1217 #if HAVE_DSPR2
1218 const ConvolveFunctions convolve8_dspr2(
1219     vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2,
1220     vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2,
1221     vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2,
1222     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1223     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1224 
1225 const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) };
1226 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest,
1227                         ::testing::ValuesIn(kArrayConvolve8_dspr2));
1228 #endif  // HAVE_DSPR2
1229 
1230 #if HAVE_MSA
1231 const ConvolveFunctions convolve8_msa(
1232     vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa,
1233     vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa,
1234     vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa,
1235     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1236     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1237 
1238 const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) };
1239 INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest,
1240                         ::testing::ValuesIn(kArrayConvolve8_msa));
1241 #endif  // HAVE_MSA
1242 
1243 #if HAVE_VSX
1244 const ConvolveFunctions convolve8_vsx(
1245     vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx,
1246     vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx,
1247     vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx,
1248     vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
1249     vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
1250 const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) };
1251 INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest,
1252                         ::testing::ValuesIn(kArrayConvolve_vsx));
1253 #endif  // HAVE_VSX
1254 }  // namespace
1255