1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "test/hiprec_convolve_test_util.h"
13
14 #include <memory>
15 #include <new>
16
17 #include "av1/common/restoration.h"
18
19 using std::make_tuple;
20 using std::tuple;
21
22 namespace libaom_test {
23
24 // Generate a random pair of filter kernels, using the ranges
25 // of possible values from the loop-restoration experiment
generate_kernels(ACMRandom * rnd,InterpKernel hkernel,InterpKernel vkernel,int kernel_type=2)26 static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
27 InterpKernel vkernel, int kernel_type = 2) {
28 if (kernel_type == 0) {
29 // Low possible values for filter coefficients
30 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MINV;
31 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MINV;
32 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MINV;
33 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
34 hkernel[7] = vkernel[7] = 0;
35 } else if (kernel_type == 1) {
36 // Max possible values for filter coefficients
37 hkernel[0] = hkernel[6] = vkernel[0] = vkernel[6] = WIENER_FILT_TAP0_MAXV;
38 hkernel[1] = hkernel[5] = vkernel[1] = vkernel[5] = WIENER_FILT_TAP1_MAXV;
39 hkernel[2] = hkernel[4] = vkernel[2] = vkernel[4] = WIENER_FILT_TAP2_MAXV;
40 hkernel[3] = vkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
41 hkernel[7] = vkernel[7] = 0;
42 } else {
43 // Randomly generated values for filter coefficients
44 hkernel[0] = hkernel[6] =
45 WIENER_FILT_TAP0_MINV +
46 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
47 hkernel[1] = hkernel[5] =
48 WIENER_FILT_TAP1_MINV +
49 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
50 hkernel[2] = hkernel[4] =
51 WIENER_FILT_TAP2_MINV +
52 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
53 hkernel[3] = -2 * (hkernel[0] + hkernel[1] + hkernel[2]);
54 hkernel[7] = 0;
55
56 vkernel[0] = vkernel[6] =
57 WIENER_FILT_TAP0_MINV +
58 rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
59 vkernel[1] = vkernel[5] =
60 WIENER_FILT_TAP1_MINV +
61 rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
62 vkernel[2] = vkernel[4] =
63 WIENER_FILT_TAP2_MINV +
64 rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
65 vkernel[3] = -2 * (vkernel[0] + vkernel[1] + vkernel[2]);
66 vkernel[7] = 0;
67 }
68 }
69
70 namespace AV1HiprecConvolve {
71
BuildParams(hiprec_convolve_func filter)72 ::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
73 hiprec_convolve_func filter) {
74 const HiprecConvolveParam params[] = {
75 make_tuple(8, 8, 50000, filter), make_tuple(8, 4, 50000, filter),
76 make_tuple(64, 24, 1000, filter), make_tuple(64, 64, 1000, filter),
77 make_tuple(64, 56, 1000, filter), make_tuple(32, 8, 10000, filter),
78 make_tuple(32, 28, 10000, filter), make_tuple(32, 32, 10000, filter),
79 make_tuple(16, 34, 10000, filter), make_tuple(32, 34, 10000, filter),
80 make_tuple(64, 34, 1000, filter), make_tuple(8, 17, 10000, filter),
81 make_tuple(16, 17, 10000, filter), make_tuple(32, 17, 10000, filter)
82 };
83 return ::testing::ValuesIn(params);
84 }
85
~AV1HiprecConvolveTest()86 AV1HiprecConvolveTest::~AV1HiprecConvolveTest() {}
SetUp()87 void AV1HiprecConvolveTest::SetUp() {
88 rnd_.Reset(ACMRandom::DeterministicSeed());
89 }
90
TearDown()91 void AV1HiprecConvolveTest::TearDown() {}
92
RunCheckOutput(hiprec_convolve_func test_impl)93 void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
94 const int w = 128, h = 128;
95 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
96 const int num_iters = GET_PARAM(2);
97 int i, j, k, m;
98 const ConvolveParams conv_params = get_conv_params_wiener(8);
99
100 std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
101 ASSERT_NE(input_, nullptr);
102 uint8_t *input = input_.get();
103
104 // The AVX2 convolve functions always write rows with widths that are
105 // multiples of 16. So to avoid a buffer overflow, we may need to pad
106 // rows to a multiple of 16.
107 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
108 std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
109 ASSERT_NE(output, nullptr);
110 std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
111 ASSERT_NE(output2, nullptr);
112
113 // Generate random filter kernels
114 DECLARE_ALIGNED(16, InterpKernel, hkernel);
115 DECLARE_ALIGNED(16, InterpKernel, vkernel);
116
117 for (int kernel_type = 0; kernel_type < 3; kernel_type++) {
118 generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
119 for (i = 0; i < num_iters; ++i) {
120 for (k = 0; k < h; ++k)
121 for (m = 0; m < w; ++m) input[k * w + m] = rnd_.Rand8();
122 // Choose random locations within the source block
123 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
124 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
125 av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
126 output.get(), out_w, hkernel, 16, vkernel,
127 16, out_w, out_h, &conv_params);
128 test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
129 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params);
130
131 for (j = 0; j < out_w * out_h; ++j)
132 ASSERT_EQ(output[j], output2[j])
133 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
134 << (j / out_w) << ") on iteration " << i;
135 }
136 }
137 }
138
RunSpeedTest(hiprec_convolve_func test_impl)139 void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
140 const int w = 128, h = 128;
141 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
142 const int num_iters = GET_PARAM(2) / 500;
143 int i, j, k;
144 const ConvolveParams conv_params = get_conv_params_wiener(8);
145
146 std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
147 ASSERT_NE(input_, nullptr);
148 uint8_t *input = input_.get();
149
150 // The AVX2 convolve functions always write rows with widths that are
151 // multiples of 16. So to avoid a buffer overflow, we may need to pad
152 // rows to a multiple of 16.
153 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
154 std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
155 ASSERT_NE(output, nullptr);
156 std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
157 ASSERT_NE(output2, nullptr);
158
159 // Generate random filter kernels
160 DECLARE_ALIGNED(16, InterpKernel, hkernel);
161 DECLARE_ALIGNED(16, InterpKernel, vkernel);
162
163 generate_kernels(&rnd_, hkernel, vkernel);
164
165 for (i = 0; i < h; ++i)
166 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
167
168 aom_usec_timer ref_timer;
169 aom_usec_timer_start(&ref_timer);
170 for (i = 0; i < num_iters; ++i) {
171 for (j = 3; j < h - out_h - 4; j++) {
172 for (k = 3; k < w - out_w - 4; k++) {
173 av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
174 hkernel, 16, vkernel, 16, out_w, out_h,
175 &conv_params);
176 }
177 }
178 }
179 aom_usec_timer_mark(&ref_timer);
180 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
181
182 aom_usec_timer tst_timer;
183 aom_usec_timer_start(&tst_timer);
184 for (i = 0; i < num_iters; ++i) {
185 for (j = 3; j < h - out_h - 4; j++) {
186 for (k = 3; k < w - out_w - 4; k++) {
187 test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16,
188 vkernel, 16, out_w, out_h, &conv_params);
189 }
190 }
191 }
192 aom_usec_timer_mark(&tst_timer);
193 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
194
195 std::cout << "[ ] C time = " << ref_time / 1000
196 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
197
198 EXPECT_GT(ref_time, tst_time)
199 << "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
200 << "C time: " << ref_time << " us\n"
201 << "SIMD time: " << tst_time << " us\n";
202 }
203 } // namespace AV1HiprecConvolve
204
205 #if CONFIG_AV1_HIGHBITDEPTH
206 namespace AV1HighbdHiprecConvolve {
207
BuildParams(highbd_hiprec_convolve_func filter)208 ::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
209 highbd_hiprec_convolve_func filter) {
210 const HighbdHiprecConvolveParam params[] = {
211 make_tuple(8, 8, 50000, 8, filter), make_tuple(64, 64, 1000, 8, filter),
212 make_tuple(32, 8, 10000, 8, filter), make_tuple(8, 8, 50000, 10, filter),
213 make_tuple(64, 64, 1000, 10, filter), make_tuple(32, 8, 10000, 10, filter),
214 make_tuple(8, 8, 50000, 12, filter), make_tuple(64, 64, 1000, 12, filter),
215 make_tuple(32, 8, 10000, 12, filter),
216 };
217 return ::testing::ValuesIn(params);
218 }
219
~AV1HighbdHiprecConvolveTest()220 AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() {}
SetUp()221 void AV1HighbdHiprecConvolveTest::SetUp() {
222 rnd_.Reset(ACMRandom::DeterministicSeed());
223 }
224
TearDown()225 void AV1HighbdHiprecConvolveTest::TearDown() {}
226
RunCheckOutput(highbd_hiprec_convolve_func test_impl)227 void AV1HighbdHiprecConvolveTest::RunCheckOutput(
228 highbd_hiprec_convolve_func test_impl) {
229 const int w = 128, h = 128;
230 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
231 const int num_iters = GET_PARAM(2);
232 const int bd = GET_PARAM(3);
233 int i, j;
234 const ConvolveParams conv_params = get_conv_params_wiener(bd);
235
236 std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
237 ASSERT_NE(input, nullptr);
238
239 // The AVX2 convolve functions always write rows with widths that are
240 // multiples of 16. So to avoid a buffer overflow, we may need to pad
241 // rows to a multiple of 16.
242 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
243 std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
244 ASSERT_NE(output, nullptr);
245 std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
246 ASSERT_NE(output2, nullptr);
247
248 // Generate random filter kernels
249 DECLARE_ALIGNED(16, InterpKernel, hkernel);
250 DECLARE_ALIGNED(16, InterpKernel, vkernel);
251
252 for (i = 0; i < h; ++i)
253 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
254
255 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
256 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
257 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
258 for (int kernel_type = 0; kernel_type < 3; kernel_type++) {
259 generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
260 for (i = 0; i < num_iters; ++i) {
261 // Choose random locations within the source block
262 int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
263 int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
264 av1_highbd_wiener_convolve_add_src_c(
265 input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
266 16, vkernel, 16, out_w, out_h, &conv_params, bd);
267 test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
268 hkernel, 16, vkernel, 16, out_w, out_h, &conv_params, bd);
269
270 for (j = 0; j < out_w * out_h; ++j)
271 ASSERT_EQ(output[j], output2[j])
272 << "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
273 << (j / out_w) << ") on iteration " << i;
274 }
275 }
276 }
277
RunSpeedTest(highbd_hiprec_convolve_func test_impl)278 void AV1HighbdHiprecConvolveTest::RunSpeedTest(
279 highbd_hiprec_convolve_func test_impl) {
280 const int w = 128, h = 128;
281 const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
282 const int num_iters = GET_PARAM(2) / 500;
283 const int bd = GET_PARAM(3);
284 int i, j, k;
285 const ConvolveParams conv_params = get_conv_params_wiener(bd);
286
287 std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
288 ASSERT_NE(input, nullptr);
289
290 // The AVX2 convolve functions always write rows with widths that are
291 // multiples of 16. So to avoid a buffer overflow, we may need to pad
292 // rows to a multiple of 16.
293 int output_n = ALIGN_POWER_OF_TWO(out_w, 4) * out_h;
294 std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
295 ASSERT_NE(output, nullptr);
296 std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
297 ASSERT_NE(output2, nullptr);
298
299 // Generate random filter kernels
300 DECLARE_ALIGNED(16, InterpKernel, hkernel);
301 DECLARE_ALIGNED(16, InterpKernel, vkernel);
302
303 generate_kernels(&rnd_, hkernel, vkernel);
304
305 for (i = 0; i < h; ++i)
306 for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
307
308 uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
309 uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
310 uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
311
312 aom_usec_timer ref_timer;
313 aom_usec_timer_start(&ref_timer);
314 for (i = 0; i < num_iters; ++i) {
315 for (j = 3; j < h - out_h - 4; j++) {
316 for (k = 3; k < w - out_w - 4; k++) {
317 av1_highbd_wiener_convolve_add_src_c(
318 input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16, vkernel,
319 16, out_w, out_h, &conv_params, bd);
320 }
321 }
322 }
323 aom_usec_timer_mark(&ref_timer);
324 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
325
326 aom_usec_timer tst_timer;
327 aom_usec_timer_start(&tst_timer);
328 for (i = 0; i < num_iters; ++i) {
329 for (j = 3; j < h - out_h - 4; j++) {
330 for (k = 3; k < w - out_w - 4; k++) {
331 test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16,
332 vkernel, 16, out_w, out_h, &conv_params, bd);
333 }
334 }
335 }
336 aom_usec_timer_mark(&tst_timer);
337 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
338
339 std::cout << "[ ] C time = " << ref_time / 1000
340 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
341
342 EXPECT_GT(ref_time, tst_time)
343 << "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
344 << "C time: " << ref_time << " us\n"
345 << "SIMD time: " << tst_time << " us\n";
346 }
347 } // namespace AV1HighbdHiprecConvolve
348 #endif // CONFIG_AV1_HIGHBITDEPTH
349 } // namespace libaom_test
350