• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "test/hiprec_convolve_test_util.h"
13 
14 #include <memory>
15 #include <new>
16 
17 #include "av1/common/restoration.h"
18 
19 using std::make_tuple;
20 using std::tuple;
21 
22 namespace libaom_test {
23 
24 // Generate a random pair of filter kernels, using the ranges
25 // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel,int kernel_type=2)26 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
27                              InterpKernel vkernel, int kernel_type = 2) {
28   if (kernel_type == 0) {
29     // Low possible values for filter coefficients
30     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
31     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
32     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
33     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
34     hkernel[7] = vkernel[7] = 0;
35   } else if (kernel_type == 1) {
36     // Max possible values for filter coefficients
37     hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
38     hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
39     hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
40     hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
41     hkernel[7] = vkernel[7] = 0;
42   } else {
43     // Randomly generated values for filter coefficients
44     hkernel[0] = hkernel[6] =
45         WIENER_FILT_TAP0_MINV +
46         rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
47     hkernel[1] = hkernel[5] =
48         WIENER_FILT_TAP1_MINV +
49         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
50     hkernel[2] = hkernel[4] =
51         WIENER_FILT_TAP2_MINV +
52         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
53     hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
54     hkernel[7] = 0;
55 
56     vkernel[0] = vkernel[6] =
57         WIENER_FILT_TAP0_MINV +
58         rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
59     vkernel[1] = vkernel[5] =
60         WIENER_FILT_TAP1_MINV +
61         rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
62     vkernel[2] = vkernel[4] =
63         WIENER_FILT_TAP2_MINV +
64         rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
65     vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
66     vkernel[7] = 0;
67   }
68 }
69 
70 namespace AV1HiprecConvolve {
71 
BuildParams(hiprec_convolve_func filter)72 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
73     hiprec_convolve_func filter) {
74   const HiprecConvolveParam params[] = {
75     make_tuple(8, 8, 50000, filter),   make_tuple(8, 4, 50000, filter),
76     make_tuple(64, 24, 1000, filter),  make_tuple(64, 64, 1000, filter),
77     make_tuple(64, 56, 1000, filter),  make_tuple(32, 8, 10000, filter),
78     make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
79     make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
80     make_tuple(64, 34, 1000, filter),  make_tuple(8, 17, 10000, filter),
81     make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
82   };
83   return ::testing::ValuesIn(params);
84 }
85 
~AV1HiprecConvolveTest()86 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
SetUp()87 void AV1HiprecConvolveTest::SetUp() {
88   rnd_.Reset(ACMRandom::DeterministicSeed());
89 }
90 
TearDown()91 void AV1HiprecConvolveTest::TearDown() {}
92 
RunCheckOutput(hiprec_convolve_func test_impl)93 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
94   const int w = 128, h = 128;
95   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
96   const int num_iters = GET_PARAM(2);
97   int i, j, k, m;
98   const ConvolveParams conv_params = get_conv_params_wiener(8);
99 
100   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
101   ASSERT_NE(input_, nullptr);
102   uint8_t *input = input_.get();
103 
104   // The AVX2 convolve functions always write rows with widths that are
105   // multiples of 16. So to avoid a buffer overflow, we may need to pad
106   // rows to a multiple of 16.
107   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
108   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
109   ASSERT_NE(output, nullptr);
110   std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
111   ASSERT_NE(output2, nullptr);
112 
113   // Generate random filter kernels
114   DECLARE_ALIGNED(16, InterpKernel, hkernel);
115   DECLARE_ALIGNED(16, InterpKernel, vkernel);
116 
117   for (int kernel_type = 0; kernel_type < 3; kernel_type++) {
118     generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
119     for (i = 0; i < num_iters; ++i) {
120       for (k = 0; k < h; ++k)
121         for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
122       // Choose random locations within the source block
123       int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
124       int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
125       av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
126                                     output.get(), out_w, hkernel, 16, vkernel,
127                                     16, out_w, out_h, &conv_params);
128       test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
129                 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
130 
131       for (j = 0; j < out_w * out_h; ++j)
132         ASSERT_EQ(output[j], output2[j])
133             << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
134             << (j / out_w) << ") on iteration " << i;
135     }
136   }
137 }
138 
RunSpeedTest(hiprec_convolve_func test_impl)139 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
140   const int w = 128, h = 128;
141   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
142   const int num_iters = GET_PARAM(2) / 500;
143   int i, j, k;
144   const ConvolveParams conv_params = get_conv_params_wiener(8);
145 
146   std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
147   ASSERT_NE(input_, nullptr);
148   uint8_t *input = input_.get();
149 
150   // The AVX2 convolve functions always write rows with widths that are
151   // multiples of 16. So to avoid a buffer overflow, we may need to pad
152   // rows to a multiple of 16.
153   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
154   std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
155   ASSERT_NE(output, nullptr);
156   std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
157   ASSERT_NE(output2, nullptr);
158 
159   // Generate random filter kernels
160   DECLARE_ALIGNED(16, InterpKernel, hkernel);
161   DECLARE_ALIGNED(16, InterpKernel, vkernel);
162 
163   generate_kernels(&rnd_, hkernel, vkernel);
164 
165   for (i = 0; i < h; ++i)
166     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
167 
168   aom_usec_timer ref_timer;
169   aom_usec_timer_start(&ref_timer);
170   for (i = 0; i < num_iters; ++i) {
171     for (j = 3; j < h - out_h - 4; j++) {
172       for (k = 3; k < w - out_w - 4; k++) {
173         av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
174                                       hkernel, 16, vkernel, 16, out_w, out_h,
175                                       &conv_params);
176       }
177     }
178   }
179   aom_usec_timer_mark(&ref_timer);
180   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
181 
182   aom_usec_timer tst_timer;
183   aom_usec_timer_start(&tst_timer);
184   for (i = 0; i < num_iters; ++i) {
185     for (j = 3; j < h - out_h - 4; j++) {
186       for (k = 3; k < w - out_w - 4; k++) {
187         test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
188                   vkernel, 16, out_w, out_h, &conv_params);
189       }
190     }
191   }
192   aom_usec_timer_mark(&tst_timer);
193   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
194 
195   std::cout << "[          ] C time = " << ref_time / 1000
196             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
197 
198   EXPECT_GT(ref_time, tst_time)
199       << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
200       << "C time: " << ref_time << " us\n"
201       << "SIMD time: " << tst_time << " us\n";
202 }
203 }  // namespace AV1HiprecConvolve
204 
205 #if CONFIG_AV1_HIGHBITDEPTH
206 namespace AV1HighbdHiprecConvolve {
207 
BuildParams(highbd_hiprec_convolve_func filter)208 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
209     highbd_hiprec_convolve_func filter) {
210   const HighbdHiprecConvolveParam params[] = {
211     make_tuple(8, 8, 50000, 8, filter),   make_tuple(64, 64, 1000, 8, filter),
212     make_tuple(32, 8, 10000, 8, filter),  make_tuple(8, 8, 50000, 10, filter),
213     make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
214     make_tuple(8, 8, 50000, 12, filter),  make_tuple(64, 64, 1000, 12, filter),
215     make_tuple(32, 8, 10000, 12, filter),
216   };
217   return ::testing::ValuesIn(params);
218 }
219 
~AV1HighbdHiprecConvolveTest()220 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
SetUp()221 void AV1HighbdHiprecConvolveTest::SetUp() {
222   rnd_.Reset(ACMRandom::DeterministicSeed());
223 }
224 
TearDown()225 void AV1HighbdHiprecConvolveTest::TearDown() {}
226 
RunCheckOutput(highbd_hiprec_convolve_func test_impl)227 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
228     highbd_hiprec_convolve_func test_impl) {
229   const int w = 128, h = 128;
230   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
231   const int num_iters = GET_PARAM(2);
232   const int bd = GET_PARAM(3);
233   int i, j;
234   const ConvolveParams conv_params = get_conv_params_wiener(bd);
235 
236   std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
237   ASSERT_NE(input, nullptr);
238 
239   // The AVX2 convolve functions always write rows with widths that are
240   // multiples of 16. So to avoid a buffer overflow, we may need to pad
241   // rows to a multiple of 16.
242   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
243   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
244   ASSERT_NE(output, nullptr);
245   std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
246   ASSERT_NE(output2, nullptr);
247 
248   // Generate random filter kernels
249   DECLARE_ALIGNED(16, InterpKernel, hkernel);
250   DECLARE_ALIGNED(16, InterpKernel, vkernel);
251 
252   for (i = 0; i < h; ++i)
253     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
254 
255   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
256   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
257   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
258   for (int kernel_type = 0; kernel_type < 3; kernel_type++) {
259     generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
260     for (i = 0; i < num_iters; ++i) {
261       // Choose random locations within the source block
262       int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
263       int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
264       av1_highbd_wiener_convolve_add_src_c(
265           input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
266           16, vkernel, 16, out_w, out_h, &conv_params, bd);
267       test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
268                 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
269 
270       for (j = 0; j < out_w * out_h; ++j)
271         ASSERT_EQ(output[j], output2[j])
272             << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
273             << (j / out_w) << ") on iteration " << i;
274     }
275   }
276 }
277 
RunSpeedTest(highbd_hiprec_convolve_func test_impl)278 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
279     highbd_hiprec_convolve_func test_impl) {
280   const int w = 128, h = 128;
281   const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
282   const int num_iters = GET_PARAM(2) / 500;
283   const int bd = GET_PARAM(3);
284   int i, j, k;
285   const ConvolveParams conv_params = get_conv_params_wiener(bd);
286 
287   std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
288   ASSERT_NE(input, nullptr);
289 
290   // The AVX2 convolve functions always write rows with widths that are
291   // multiples of 16. So to avoid a buffer overflow, we may need to pad
292   // rows to a multiple of 16.
293   int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
294   std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
295   ASSERT_NE(output, nullptr);
296   std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
297   ASSERT_NE(output2, nullptr);
298 
299   // Generate random filter kernels
300   DECLARE_ALIGNED(16, InterpKernel, hkernel);
301   DECLARE_ALIGNED(16, InterpKernel, vkernel);
302 
303   generate_kernels(&rnd_, hkernel, vkernel);
304 
305   for (i = 0; i < h; ++i)
306     for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
307 
308   uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
309   uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
310   uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
311 
312   aom_usec_timer ref_timer;
313   aom_usec_timer_start(&ref_timer);
314   for (i = 0; i < num_iters; ++i) {
315     for (j = 3; j < h - out_h - 4; j++) {
316       for (k = 3; k < w - out_w - 4; k++) {
317         av1_highbd_wiener_convolve_add_src_c(
318             input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
319             16, out_w, out_h, &conv_params, bd);
320       }
321     }
322   }
323   aom_usec_timer_mark(&ref_timer);
324   const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
325 
326   aom_usec_timer tst_timer;
327   aom_usec_timer_start(&tst_timer);
328   for (i = 0; i < num_iters; ++i) {
329     for (j = 3; j < h - out_h - 4; j++) {
330       for (k = 3; k < w - out_w - 4; k++) {
331         test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
332                   vkernel, 16, out_w, out_h, &conv_params, bd);
333       }
334     }
335   }
336   aom_usec_timer_mark(&tst_timer);
337   const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
338 
339   std::cout << "[          ] C time = " << ref_time / 1000
340             << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
341 
342   EXPECT_GT(ref_time, tst_time)
343       << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
344       << "C time: " << ref_time << " us\n"
345       << "SIMD time: " << tst_time << " us\n";
346 }
347 }  // namespace AV1HighbdHiprecConvolve
348 #endif  // CONFIG_AV1_HIGHBITDEPTH
349 }  // namespace libaom_test
350