1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "test/av1_convolve_2d_test_util.h"
13
14 #include "aom_ports/aom_timer.h"
15 #include "av1/common/common_data.h"
16 #include "av1/common/convolve.h"
17
18 using std::make_tuple;
19 using std::tuple;
20
21 namespace libaom_test {
22
23 const int kMaxSize = 128 + 32; // padding
24 namespace AV1Convolve2D {
25
BuildParams(convolve_2d_func filter,int has_subx,int has_suby)26 ::testing::internal::ParamGenerator<Convolve2DParam> BuildParams(
27 convolve_2d_func filter, int has_subx, int has_suby) {
28 return ::testing::Combine(::testing::Values(filter),
29 ::testing::Values(has_subx),
30 ::testing::Values(has_suby),
31 ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
32 }
33
~AV1Convolve2DSrTest()34 AV1Convolve2DSrTest::~AV1Convolve2DSrTest() {}
SetUp()35 void AV1Convolve2DSrTest::SetUp() {
36 rnd_.Reset(ACMRandom::DeterministicSeed());
37 }
38
TearDown()39 void AV1Convolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
40
RunCheckOutput(convolve_2d_func test_impl)41 void AV1Convolve2DSrTest::RunCheckOutput(convolve_2d_func test_impl) {
42 const int w = kMaxSize, h = kMaxSize;
43 const int has_subx = GET_PARAM(1);
44 const int has_suby = GET_PARAM(2);
45 const int block_idx = GET_PARAM(3);
46 int hfilter, vfilter, subx, suby;
47 uint8_t input[kMaxSize * kMaxSize];
48 DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
49 DECLARE_ALIGNED(32, uint8_t, output2[MAX_SB_SQUARE]);
50
51 for (int i = 0; i < h; ++i)
52 for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
53 for (int i = 0; i < MAX_SB_SQUARE; ++i)
54 output[i] = output2[i] = static_cast<uint8_t>(rnd_.Rand31());
55
56 // Make sure that sizes 2xN and Nx2 are also tested for chroma.
57 const int num_sizes =
58 (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
59 : 1;
60 for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
61 const int out_w = block_size_wide[block_idx] >> shift;
62 const int out_h = block_size_high[block_idx] >> shift;
63 for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
64 for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
65 ++vfilter) {
66 const InterpFilterParams *filter_params_x =
67 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
68 out_w);
69 const InterpFilterParams *filter_params_y =
70 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
71 out_h);
72 for (int do_average = 0; do_average < 1; ++do_average) {
73 ConvolveParams conv_params1 =
74 get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
75 ConvolveParams conv_params2 =
76 get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
77
78 const int subx_range = has_subx ? 16 : 1;
79 const int suby_range = has_suby ? 16 : 1;
80 for (subx = 0; subx < subx_range; ++subx) {
81 for (suby = 0; suby < suby_range; ++suby) {
82 // Choose random locations within the source block
83 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
84 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
85 av1_convolve_2d_sr_c(input + offset_r * w + offset_c, w, output,
86 MAX_SB_SIZE, out_w, out_h, filter_params_x,
87 filter_params_y, subx, suby, &conv_params1);
88 test_impl(input + offset_r * w + offset_c, w, output2,
89 MAX_SB_SIZE, out_w, out_h, filter_params_x,
90 filter_params_y, subx, suby, &conv_params2);
91
92 if (memcmp(output, output2, sizeof(output))) {
93 for (int i = 0; i < MAX_SB_SIZE; ++i) {
94 for (int j = 0; j < MAX_SB_SIZE; ++j) {
95 int idx = i * MAX_SB_SIZE + j;
96 ASSERT_EQ(output[idx], output2[idx])
97 << out_w << "x" << out_h << " Pixel mismatch at index "
98 << idx << " = (" << i << ", " << j
99 << "), sub pixel offset = (" << suby << ", " << subx
100 << ")";
101 }
102 }
103 }
104 }
105 }
106 }
107 }
108 }
109 }
110 }
111
RunSpeedTest(convolve_2d_func test_impl)112 void AV1Convolve2DSrTest::RunSpeedTest(convolve_2d_func test_impl) {
113 const int w = kMaxSize, h = kMaxSize;
114 const int has_subx = GET_PARAM(1);
115 const int has_suby = GET_PARAM(2);
116 const int block_idx = GET_PARAM(3);
117
118 uint8_t input[kMaxSize * kMaxSize];
119 DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
120
121 for (int i = 0; i < h; ++i)
122 for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
123
124 int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
125 int subx = 0, suby = 0;
126
127 const int do_average = 0;
128 ConvolveParams conv_params2 =
129 get_conv_params_no_round(do_average, 0, NULL, 0, 0, 8);
130
131 // Make sure that sizes 2xN and Nx2 are also tested for chroma.
132 const int num_sizes =
133 (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
134 : 1;
135 for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
136 const int out_w = block_size_wide[block_idx] >> shift;
137 const int out_h = block_size_high[block_idx] >> shift;
138 const int num_loops = 1000000000 / (out_w + out_h);
139
140 const InterpFilterParams *filter_params_x =
141 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
142 out_w);
143 const InterpFilterParams *filter_params_y =
144 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
145 out_h);
146
147 aom_usec_timer timer;
148 aom_usec_timer_start(&timer);
149
150 for (int i = 0; i < num_loops; ++i)
151 test_impl(input, w, output, MAX_SB_SIZE, out_w, out_h, filter_params_x,
152 filter_params_y, subx, suby, &conv_params2);
153
154 aom_usec_timer_mark(&timer);
155 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
156 printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
157 out_h, 1000.0 * elapsed_time / num_loops);
158 }
159 }
160
~AV1JntConvolve2DTest()161 AV1JntConvolve2DTest::~AV1JntConvolve2DTest() {}
SetUp()162 void AV1JntConvolve2DTest::SetUp() {
163 rnd_.Reset(ACMRandom::DeterministicSeed());
164 }
165
TearDown()166 void AV1JntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
167
RunCheckOutput(convolve_2d_func test_impl)168 void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
169 const int w = kMaxSize, h = kMaxSize;
170 const int has_subx = GET_PARAM(1);
171 const int has_suby = GET_PARAM(2);
172 const int block_idx = GET_PARAM(3);
173 int hfilter, vfilter, subx, suby;
174 uint8_t input[kMaxSize * kMaxSize];
175 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
176 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
177 DECLARE_ALIGNED(16, uint8_t, output8_1[MAX_SB_SQUARE]);
178 DECLARE_ALIGNED(16, uint8_t, output8_2[MAX_SB_SQUARE]);
179
180 for (int i = 0; i < h; ++i)
181 for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
182 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
183 output1[i] = output2[i] = rnd_.Rand16();
184 output8_1[i] = output8_2[i] = rnd_.Rand8();
185 }
186
187 const int out_w = block_size_wide[block_idx];
188 const int out_h = block_size_high[block_idx];
189 for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
190 for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
191 const InterpFilterParams *filter_params_x =
192 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
193 out_w);
194 const InterpFilterParams *filter_params_y =
195 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
196 out_h);
197 for (int do_average = 0; do_average <= 1; ++do_average) {
198 ConvolveParams conv_params1 =
199 get_conv_params_no_round(do_average, 0, output1, MAX_SB_SIZE, 1, 8);
200 ConvolveParams conv_params2 =
201 get_conv_params_no_round(do_average, 0, output2, MAX_SB_SIZE, 1, 8);
202
203 // Test special case where dist_wtd_comp_avg is not used
204 conv_params1.use_dist_wtd_comp_avg = 0;
205 conv_params2.use_dist_wtd_comp_avg = 0;
206
207 const int subx_range = has_subx ? 16 : 1;
208 const int suby_range = has_suby ? 16 : 1;
209 for (subx = 0; subx < subx_range; ++subx) {
210 for (suby = 0; suby < suby_range; ++suby) {
211 // Choose random locations within the source block
212 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
213 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
214 av1_dist_wtd_convolve_2d_c(input + offset_r * w + offset_c, w,
215 output8_1, MAX_SB_SIZE, out_w, out_h,
216 filter_params_x, filter_params_y, subx,
217 suby, &conv_params1);
218 test_impl(input + offset_r * w + offset_c, w, output8_2,
219 MAX_SB_SIZE, out_w, out_h, filter_params_x,
220 filter_params_y, subx, suby, &conv_params2);
221
222 for (int i = 0; i < out_h; ++i) {
223 for (int j = 0; j < out_w; ++j) {
224 int idx = i * MAX_SB_SIZE + j;
225 ASSERT_EQ(output1[idx], output2[idx])
226 << "Mismatch at unit tests for av1_dist_wtd_convolve_2d\n"
227 << out_w << "x" << out_h << " Pixel mismatch at index "
228 << idx << " = (" << i << ", " << j
229 << "), sub pixel offset = (" << suby << ", " << subx << ")";
230 }
231 }
232
233 if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
234 for (int i = 0; i < MAX_SB_SIZE; ++i) {
235 for (int j = 0; j < MAX_SB_SIZE; ++j) {
236 int idx = i * MAX_SB_SIZE + j;
237 ASSERT_EQ(output8_1[idx], output8_2[idx])
238 << out_w << "x" << out_h << " Pixel mismatch at index "
239 << idx << " = (" << i << ", " << j
240 << "), sub pixel offset = (" << suby << ", " << subx
241 << ")";
242 }
243 }
244 }
245 }
246 }
247
248 // Test different combination of fwd and bck offset weights
249 for (int k = 0; k < 2; ++k) {
250 for (int l = 0; l < 4; ++l) {
251 conv_params1.use_dist_wtd_comp_avg = 1;
252 conv_params2.use_dist_wtd_comp_avg = 1;
253 conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
254 conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
255 conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
256 conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
257
258 for (subx = 0; subx < subx_range; ++subx) {
259 for (suby = 0; suby < suby_range; ++suby) {
260 // Choose random locations within the source block
261 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
262 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
263 av1_dist_wtd_convolve_2d_c(input + offset_r * w + offset_c, w,
264 output8_1, MAX_SB_SIZE, out_w, out_h,
265 filter_params_x, filter_params_y,
266 subx, suby, &conv_params1);
267 test_impl(input + offset_r * w + offset_c, w, output8_2,
268 MAX_SB_SIZE, out_w, out_h, filter_params_x,
269 filter_params_y, subx, suby, &conv_params2);
270
271 for (int i = 0; i < out_h; ++i) {
272 for (int j = 0; j < out_w; ++j) {
273 int idx = i * MAX_SB_SIZE + j;
274 ASSERT_EQ(output1[idx], output2[idx])
275 << "Mismatch at unit tests for "
276 "av1_dist_wtd_convolve_2d\n"
277 << out_w << "x" << out_h << " Pixel mismatch at index "
278 << idx << " = (" << i << ", " << j
279 << "), sub pixel offset = (" << suby << ", " << subx
280 << ")";
281 }
282 }
283 if (memcmp(output8_1, output8_2, sizeof(output8_1))) {
284 for (int i = 0; i < MAX_SB_SIZE; ++i) {
285 for (int j = 0; j < MAX_SB_SIZE; ++j) {
286 int idx = i * MAX_SB_SIZE + j;
287 ASSERT_EQ(output8_1[idx], output8_2[idx])
288 << out_w << "x" << out_h
289 << " Pixel mismatch at index " << idx << " = (" << i
290 << ", " << j << "), sub pixel offset = (" << suby
291 << ", " << subx << ")";
292 }
293 }
294 }
295 }
296 }
297 }
298 }
299 }
300 }
301 }
302 }
303
RunSpeedTest(convolve_2d_func test_impl)304 void AV1JntConvolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
305 const int w = kMaxSize, h = kMaxSize;
306 const int has_subx = GET_PARAM(1);
307 const int has_suby = GET_PARAM(2);
308 const int block_idx = GET_PARAM(3);
309
310 int subx = 0, suby = 0;
311 uint8_t input[kMaxSize * kMaxSize];
312 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
313 DECLARE_ALIGNED(16, uint8_t, output8[MAX_SB_SQUARE]);
314 int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
315 for (int i = 0; i < h; ++i)
316 for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
317 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
318 output[i] = rnd_.Rand16();
319 output8[i] = rnd_.Rand8();
320 }
321
322 const int out_w = block_size_wide[block_idx];
323 const int out_h = block_size_high[block_idx];
324 const int num_loops = 1000000000 / (out_w + out_h);
325 const int do_average = 0;
326
327 const InterpFilterParams *filter_params_x =
328 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
329 out_w);
330 const InterpFilterParams *filter_params_y =
331 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
332 out_h);
333
334 ConvolveParams conv_params =
335 get_conv_params_no_round(do_average, 0, output, MAX_SB_SIZE, 1, 8);
336
337 conv_params.use_dist_wtd_comp_avg = 0;
338
339 // Choose random locations within the source block
340 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
341 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
342
343 aom_usec_timer timer;
344 aom_usec_timer_start(&timer);
345
346 for (int i = 0; i < num_loops; ++i)
347 test_impl(input + offset_r * w + offset_c, w, output8, MAX_SB_SIZE, out_w,
348 out_h, filter_params_x, filter_params_y, subx, suby,
349 &conv_params);
350
351 aom_usec_timer_mark(&timer);
352 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
353 printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
354 out_h, 1000.0 * elapsed_time / num_loops);
355 }
356 } // namespace AV1Convolve2D
357
358 #if CONFIG_AV1_HIGHBITDEPTH
359 namespace AV1HighbdConvolve2D {
BuildParams(highbd_convolve_2d_func filter,int has_subx,int has_suby)360 ::testing::internal::ParamGenerator<HighbdConvolve2DParam> BuildParams(
361 highbd_convolve_2d_func filter, int has_subx, int has_suby) {
362 return ::testing::Combine(
363 ::testing::Range(8, 13, 2), ::testing::Values(filter),
364 ::testing::Values(has_subx), ::testing::Values(has_suby),
365 ::testing::Range(BLOCK_4X4, BLOCK_SIZES_ALL));
366 }
367
~AV1HighbdConvolve2DSrTest()368 AV1HighbdConvolve2DSrTest::~AV1HighbdConvolve2DSrTest() {}
SetUp()369 void AV1HighbdConvolve2DSrTest::SetUp() {
370 rnd_.Reset(ACMRandom::DeterministicSeed());
371 }
372
TearDown()373 void AV1HighbdConvolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
374
RunSpeedTest(highbd_convolve_2d_func test_impl)375 void AV1HighbdConvolve2DSrTest::RunSpeedTest(
376 highbd_convolve_2d_func test_impl) {
377 const int w = kMaxSize, h = kMaxSize;
378 const int bd = GET_PARAM(0);
379 const int has_subx = GET_PARAM(2);
380 const int has_suby = GET_PARAM(3);
381 const int block_idx = GET_PARAM(4);
382 int hfilter, vfilter, subx, suby;
383 uint16_t input[kMaxSize * kMaxSize];
384 DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
385
386 for (int i = 0; i < h; ++i)
387 for (int j = 0; j < w; ++j)
388 input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
389
390 hfilter = EIGHTTAP_REGULAR;
391 vfilter = EIGHTTAP_REGULAR;
392 int do_average = 0;
393
394 const int offset_r = 3;
395 const int offset_c = 3;
396 subx = 0;
397 suby = 0;
398
399 ConvolveParams conv_params =
400 get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
401
402 // Make sure that sizes 2xN and Nx2 are also tested for chroma.
403 const int num_sizes =
404 (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
405 : 1;
406
407 for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
408 const int out_w = block_size_wide[block_idx] >> shift;
409 const int out_h = block_size_high[block_idx] >> shift;
410 const int num_loops = 1000000000 / (out_w + out_h);
411
412 const InterpFilterParams *filter_params_x =
413 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
414 out_w);
415 const InterpFilterParams *filter_params_y =
416 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
417 out_h);
418
419 aom_usec_timer timer;
420 aom_usec_timer_start(&timer);
421 for (int i = 0; i < num_loops; ++i)
422 test_impl(input + offset_r * w + offset_c, w, output, MAX_SB_SIZE, out_w,
423 out_h, filter_params_x, filter_params_y, subx, suby,
424 &conv_params, bd);
425
426 aom_usec_timer_mark(&timer);
427 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
428 printf("%d,%d convolve %3dx%-3d: %7.2f us\n", has_subx, has_suby, out_w,
429 out_h, 1000.0 * elapsed_time / num_loops);
430 }
431 }
432
RunCheckOutput(highbd_convolve_2d_func test_impl)433 void AV1HighbdConvolve2DSrTest::RunCheckOutput(
434 highbd_convolve_2d_func test_impl) {
435 const int w = kMaxSize, h = kMaxSize;
436 const int bd = GET_PARAM(0);
437 const int has_subx = GET_PARAM(2);
438 const int has_suby = GET_PARAM(3);
439 const int block_idx = GET_PARAM(4);
440 int hfilter, vfilter, subx, suby;
441 uint16_t input[kMaxSize * kMaxSize];
442 DECLARE_ALIGNED(32, uint16_t, output[MAX_SB_SQUARE]);
443 DECLARE_ALIGNED(32, uint16_t, output2[MAX_SB_SQUARE]);
444
445 for (int i = 0; i < h; ++i)
446 for (int j = 0; j < w; ++j)
447 input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
448 for (int i = 0; i < MAX_SB_SQUARE; ++i)
449 output[i] = output2[i] = static_cast<int16_t>(rnd_.Rand31());
450
451 // Make sure that sizes 2xN and Nx2 are also tested for chroma.
452 const int num_sizes =
453 (block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4) ? 2
454 : 1;
455 for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
456 const int out_w = block_size_wide[block_idx] >> shift;
457 const int out_h = block_size_high[block_idx] >> shift;
458 for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
459 for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
460 ++vfilter) {
461 const InterpFilterParams *filter_params_x =
462 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
463 out_w);
464 const InterpFilterParams *filter_params_y =
465 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
466 out_h);
467 for (int do_average = 0; do_average < 1; ++do_average) {
468 ConvolveParams conv_params1 =
469 get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
470 ConvolveParams conv_params2 =
471 get_conv_params_no_round(do_average, 0, NULL, 0, 0, bd);
472
473 const int subx_range = has_subx ? 16 : 1;
474 const int suby_range = has_suby ? 16 : 1;
475 for (subx = 0; subx < subx_range; ++subx) {
476 for (suby = 0; suby < suby_range; ++suby) {
477 // Choose random locations within the source block
478 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
479 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
480 av1_highbd_convolve_2d_sr_c(input + offset_r * w + offset_c, w,
481 output, MAX_SB_SIZE, out_w, out_h,
482 filter_params_x, filter_params_y,
483 subx, suby, &conv_params1, bd);
484 test_impl(input + offset_r * w + offset_c, w, output2,
485 MAX_SB_SIZE, out_w, out_h, filter_params_x,
486 filter_params_y, subx, suby, &conv_params2, bd);
487
488 if (memcmp(output, output2, sizeof(output))) {
489 for (int i = 0; i < MAX_SB_SIZE; ++i) {
490 for (int j = 0; j < MAX_SB_SIZE; ++j) {
491 int idx = i * MAX_SB_SIZE + j;
492 ASSERT_EQ(output[idx], output2[idx])
493 << out_w << "x" << out_h << " Pixel mismatch at index "
494 << idx << " = (" << i << ", " << j
495 << "), sub pixel offset = (" << suby << ", " << subx
496 << ")";
497 }
498 }
499 }
500 }
501 }
502 }
503 }
504 }
505 }
506 }
507
~AV1HighbdJntConvolve2DTest()508 AV1HighbdJntConvolve2DTest::~AV1HighbdJntConvolve2DTest() {}
SetUp()509 void AV1HighbdJntConvolve2DTest::SetUp() {
510 rnd_.Reset(ACMRandom::DeterministicSeed());
511 }
512
TearDown()513 void AV1HighbdJntConvolve2DTest::TearDown() { libaom_test::ClearSystemState(); }
514
RunSpeedTest(highbd_convolve_2d_func test_impl)515 void AV1HighbdJntConvolve2DTest::RunSpeedTest(
516 highbd_convolve_2d_func test_impl) {
517 const int w = kMaxSize, h = kMaxSize;
518 const int bd = GET_PARAM(0);
519 const int block_idx = GET_PARAM(4);
520 int hfilter, vfilter, subx, suby;
521 uint16_t input[kMaxSize * kMaxSize];
522 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
523 DECLARE_ALIGNED(32, uint16_t, output16[MAX_SB_SQUARE]);
524
525 for (int i = 0; i < h; ++i)
526 for (int j = 0; j < w; ++j)
527 input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
528 for (int i = 0; i < MAX_SB_SQUARE; ++i) output[i] = rnd_.Rand16();
529 hfilter = EIGHTTAP_REGULAR;
530 vfilter = EIGHTTAP_REGULAR;
531 int do_average = 0;
532 const int out_w = block_size_wide[block_idx];
533 const int out_h = block_size_high[block_idx];
534
535 const InterpFilterParams *filter_params_x =
536 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
537 out_w);
538 const InterpFilterParams *filter_params_y =
539 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
540 out_h);
541
542 ConvolveParams conv_params =
543 get_conv_params_no_round(do_average, 0, output, MAX_SB_SIZE, 1, bd);
544
545 // Test special case where dist_wtd_comp_avg is not used
546 conv_params.use_dist_wtd_comp_avg = 0;
547
548 subx = 0;
549 suby = 0;
550 // Choose random locations within the source block
551 const int offset_r = 3;
552 const int offset_c = 3;
553
554 const int num_loops = 1000000000 / (out_w + out_h);
555 aom_usec_timer timer;
556 aom_usec_timer_start(&timer);
557 for (int i = 0; i < num_loops; ++i)
558 test_impl(input + offset_r * w + offset_c, w, output16, MAX_SB_SIZE, out_w,
559 out_h, filter_params_x, filter_params_y, subx, suby, &conv_params,
560 bd);
561
562 aom_usec_timer_mark(&timer);
563 const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
564 printf("convolve %3dx%-3d: %7.2f us\n", out_w, out_h,
565 1000.0 * elapsed_time / num_loops);
566 }
567
RunCheckOutput(highbd_convolve_2d_func test_impl)568 void AV1HighbdJntConvolve2DTest::RunCheckOutput(
569 highbd_convolve_2d_func test_impl) {
570 const int w = kMaxSize, h = kMaxSize;
571 const int bd = GET_PARAM(0);
572 const int has_subx = GET_PARAM(2);
573 const int has_suby = GET_PARAM(3);
574 const int block_idx = GET_PARAM(4);
575 int hfilter, vfilter, subx, suby;
576 uint16_t input[kMaxSize * kMaxSize];
577 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output1[MAX_SB_SQUARE]);
578 DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
579 DECLARE_ALIGNED(32, uint16_t, output16_1[MAX_SB_SQUARE]);
580 DECLARE_ALIGNED(32, uint16_t, output16_2[MAX_SB_SQUARE]);
581
582 for (int i = 0; i < h; ++i)
583 for (int j = 0; j < w; ++j)
584 input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
585 for (int i = 0; i < MAX_SB_SQUARE; ++i) {
586 output1[i] = output2[i] = rnd_.Rand16();
587 output16_1[i] = output16_2[i] = rnd_.Rand16();
588 }
589
590 const int out_w = block_size_wide[block_idx];
591 const int out_h = block_size_high[block_idx];
592 for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL; ++hfilter) {
593 for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL; ++vfilter) {
594 const InterpFilterParams *filter_params_x =
595 av1_get_interp_filter_params_with_block_size((InterpFilter)hfilter,
596 out_w);
597 const InterpFilterParams *filter_params_y =
598 av1_get_interp_filter_params_with_block_size((InterpFilter)vfilter,
599 out_h);
600 for (int do_average = 0; do_average <= 1; ++do_average) {
601 ConvolveParams conv_params1 = get_conv_params_no_round(
602 do_average, 0, output1, MAX_SB_SIZE, 1, bd);
603 ConvolveParams conv_params2 = get_conv_params_no_round(
604 do_average, 0, output2, MAX_SB_SIZE, 1, bd);
605
606 // Test special case where dist_wtd_comp_avg is not used
607 conv_params1.use_dist_wtd_comp_avg = 0;
608 conv_params2.use_dist_wtd_comp_avg = 0;
609
610 const int subx_range = has_subx ? 16 : 1;
611 const int suby_range = has_suby ? 16 : 1;
612 for (subx = 0; subx < subx_range; ++subx) {
613 for (suby = 0; suby < suby_range; ++suby) {
614 // Choose random locations within the source block
615 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
616 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
617 av1_highbd_dist_wtd_convolve_2d_c(
618 input + offset_r * w + offset_c, w, output16_1, MAX_SB_SIZE,
619 out_w, out_h, filter_params_x, filter_params_y, subx, suby,
620 &conv_params1, bd);
621 test_impl(input + offset_r * w + offset_c, w, output16_2,
622 MAX_SB_SIZE, out_w, out_h, filter_params_x,
623 filter_params_y, subx, suby, &conv_params2, bd);
624
625 for (int i = 0; i < out_h; ++i) {
626 for (int j = 0; j < out_w; ++j) {
627 int idx = i * MAX_SB_SIZE + j;
628 ASSERT_EQ(output1[idx], output2[idx])
629 << out_w << "x" << out_h << " Pixel mismatch at index "
630 << idx << " = (" << i << ", " << j
631 << "), sub pixel offset = (" << suby << ", " << subx << ")";
632 }
633 }
634
635 if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
636 for (int i = 0; i < MAX_SB_SIZE; ++i) {
637 for (int j = 0; j < MAX_SB_SIZE; ++j) {
638 int idx = i * MAX_SB_SIZE + j;
639 ASSERT_EQ(output16_1[idx], output16_2[idx])
640 << out_w << "x" << out_h << " Pixel mismatch at index "
641 << idx << " = (" << i << ", " << j
642 << "), sub pixel offset = (" << suby << ", " << subx
643 << ")";
644 }
645 }
646 }
647 }
648 }
649
650 // Test different combination of fwd and bck offset weights
651 for (int k = 0; k < 2; ++k) {
652 for (int l = 0; l < 4; ++l) {
653 conv_params1.use_dist_wtd_comp_avg = 1;
654 conv_params2.use_dist_wtd_comp_avg = 1;
655 conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
656 conv_params1.bck_offset = quant_dist_lookup_table[k][l][1];
657 conv_params2.fwd_offset = quant_dist_lookup_table[k][l][0];
658 conv_params2.bck_offset = quant_dist_lookup_table[k][l][1];
659
660 const int subx_range = has_subx ? 16 : 1;
661 const int suby_range = has_suby ? 16 : 1;
662 for (subx = 0; subx < subx_range; ++subx) {
663 for (suby = 0; suby < suby_range; ++suby) {
664 // Choose random locations within the source block
665 const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
666 const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
667 av1_highbd_dist_wtd_convolve_2d_c(
668 input + offset_r * w + offset_c, w, output16_1, MAX_SB_SIZE,
669 out_w, out_h, filter_params_x, filter_params_y, subx, suby,
670 &conv_params1, bd);
671 test_impl(input + offset_r * w + offset_c, w, output16_2,
672 MAX_SB_SIZE, out_w, out_h, filter_params_x,
673 filter_params_y, subx, suby, &conv_params2, bd);
674
675 for (int i = 0; i < out_h; ++i) {
676 for (int j = 0; j < out_w; ++j) {
677 int idx = i * MAX_SB_SIZE + j;
678 ASSERT_EQ(output1[idx], output2[idx])
679 << out_w << "x" << out_h << " Pixel mismatch at index "
680 << idx << " = (" << i << ", " << j
681 << "), sub pixel offset = (" << suby << ", " << subx
682 << ")";
683 }
684 }
685
686 if (memcmp(output16_1, output16_2, sizeof(output16_1))) {
687 for (int i = 0; i < MAX_SB_SIZE; ++i) {
688 for (int j = 0; j < MAX_SB_SIZE; ++j) {
689 int idx = i * MAX_SB_SIZE + j;
690 ASSERT_EQ(output16_1[idx], output16_2[idx])
691 << out_w << "x" << out_h
692 << " Pixel mismatch at index " << idx << " = (" << i
693 << ", " << j << "), sub pixel offset = (" << suby
694 << ", " << subx << ")";
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 }
703 }
704 }
705 }
706 } // namespace AV1HighbdConvolve2D
707 #endif // CONFIG_AV1_HIGHBITDEPTH
708 } // namespace libaom_test
709