1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <tuple>
13 #include <vector>
14
15 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
16
17 #include "config/av1_rtcd.h"
18
19 #include "aom_ports/aom_timer.h"
20 #include "test/acm_random.h"
21 #include "test/register_state_check.h"
22 #include "test/util.h"
23
24 #include "av1/common/common_data.h"
25
26 namespace {
27 const int kTestIters = 10;
28 const int kPerfIters = 1000;
29
30 const int kVPad = 32;
31 const int kHPad = 32;
32 const int kXStepQn = 16;
33 const int kYStepQn = 20;
34
35 using libaom_test::ACMRandom;
36 using std::make_tuple;
37 using std::tuple;
38
39 enum NTaps { EIGHT_TAP, TEN_TAP, TWELVE_TAP };
NTapsToInt(NTaps ntaps)40 int NTapsToInt(NTaps ntaps) { return 8 + static_cast<int>(ntaps) * 2; }
41
42 // A 16-bit filter with a configurable number of taps.
43 class TestFilter {
44 public:
45 void set(NTaps ntaps, bool backwards);
46
47 InterpFilterParams params_;
48
49 private:
50 std::vector<int16_t> coeffs_;
51 };
52
set(NTaps ntaps,bool backwards)53 void TestFilter::set(NTaps ntaps, bool backwards) {
54 const int n = NTapsToInt(ntaps);
55 assert(n >= 8 && n <= 12);
56
57 // The filter has n * SUBPEL_SHIFTS proper elements and an extra 8 bogus
58 // elements at the end so that convolutions can read off the end safely.
59 coeffs_.resize(n * SUBPEL_SHIFTS + 8);
60
61 // The coefficients are pretty much arbitrary, but convolutions shouldn't
62 // over or underflow. For the first filter (subpels = 0), we use an
63 // increasing or decreasing ramp (depending on the backwards parameter). We
64 // don't want any zero coefficients, so we make it have an x-intercept at -1
65 // or n. To ensure absence of under/overflow, we normalise the area under the
66 // ramp to be I = 1 << FILTER_BITS (so that convolving a constant function
67 // gives the identity).
68 //
69 // When increasing, the function has the form:
70 //
71 // f(x) = A * (x + 1)
72 //
73 // Summing and rearranging for A gives A = 2 * I / (n * (n + 1)). If the
74 // filter is reversed, we have the same A but with formula
75 //
76 // g(x) = A * (n - x)
77 const int I = 1 << FILTER_BITS;
78 const float A = 2.f * I / (n * (n + 1.f));
79 for (int i = 0; i < n; ++i) {
80 coeffs_[i] = static_cast<int16_t>(A * (backwards ? (n - i) : (i + 1)));
81 }
82
83 // For the other filters, make them slightly different by swapping two
84 // columns. Filter k will have the columns (k % n) and (7 * k) % n swapped.
85 const size_t filter_size = sizeof(coeffs_[0] * n);
86 int16_t *const filter0 = &coeffs_[0];
87 for (int k = 1; k < SUBPEL_SHIFTS; ++k) {
88 int16_t *filterk = &coeffs_[k * n];
89 memcpy(filterk, filter0, filter_size);
90
91 const int idx0 = k % n;
92 const int idx1 = (7 * k) % n;
93
94 const int16_t tmp = filterk[idx0];
95 filterk[idx0] = filterk[idx1];
96 filterk[idx1] = tmp;
97 }
98
99 // Finally, write some rubbish at the end to make sure we don't use it.
100 for (int i = 0; i < 8; ++i) coeffs_[n * SUBPEL_SHIFTS + i] = 123 + i;
101
102 // Fill in params
103 params_.filter_ptr = &coeffs_[0];
104 params_.taps = n;
105 // These are ignored by the functions being tested. Set them to whatever.
106 params_.interp_filter = EIGHTTAP_REGULAR;
107 }
108
109 template <typename SrcPixel>
110 class TestImage {
111 public:
TestImage(int w,int h,int bd)112 TestImage(int w, int h, int bd) : w_(w), h_(h), bd_(bd) {
113 assert(bd < 16);
114 assert(bd <= 8 * static_cast<int>(sizeof(SrcPixel)));
115
116 // Pad width by 2*kHPad and then round up to the next multiple of 16
117 // to get src_stride_. Add another 16 for dst_stride_ (to make sure
118 // something goes wrong if we use the wrong one)
119 src_stride_ = (w_ + 2 * kHPad + 15) & ~15;
120 dst_stride_ = src_stride_ + 16;
121
122 // Allocate image data
123 src_data_.resize(2 * src_block_size());
124 dst_data_.resize(2 * dst_block_size());
125 dst_16_data_.resize(2 * dst_block_size());
126 }
127
128 void Initialize(ACMRandom *rnd);
129 void Check() const;
130
src_stride() const131 int src_stride() const { return src_stride_; }
dst_stride() const132 int dst_stride() const { return dst_stride_; }
133
src_block_size() const134 int src_block_size() const { return (h_ + 2 * kVPad) * src_stride(); }
dst_block_size() const135 int dst_block_size() const { return (h_ + 2 * kVPad) * dst_stride(); }
136
GetSrcData(bool ref,bool borders) const137 const SrcPixel *GetSrcData(bool ref, bool borders) const {
138 const SrcPixel *block = &src_data_[ref ? 0 : src_block_size()];
139 return borders ? block : block + kHPad + src_stride_ * kVPad;
140 }
141
GetDstData(bool ref,bool borders)142 SrcPixel *GetDstData(bool ref, bool borders) {
143 SrcPixel *block = &dst_data_[ref ? 0 : dst_block_size()];
144 return borders ? block : block + kHPad + dst_stride_ * kVPad;
145 }
146
GetDst16Data(bool ref,bool borders)147 CONV_BUF_TYPE *GetDst16Data(bool ref, bool borders) {
148 CONV_BUF_TYPE *block = &dst_16_data_[ref ? 0 : dst_block_size()];
149 return borders ? block : block + kHPad + dst_stride_ * kVPad;
150 }
151
152 private:
153 int w_, h_, bd_;
154 int src_stride_, dst_stride_;
155
156 std::vector<SrcPixel> src_data_;
157 std::vector<SrcPixel> dst_data_;
158 std::vector<CONV_BUF_TYPE> dst_16_data_;
159 };
160
161 template <typename Pixel>
FillEdge(ACMRandom * rnd,int num_pixels,int bd,bool trash,Pixel * data)162 void FillEdge(ACMRandom *rnd, int num_pixels, int bd, bool trash, Pixel *data) {
163 if (!trash) {
164 memset(data, 0, sizeof(*data) * num_pixels);
165 return;
166 }
167 const Pixel mask = (1 << bd) - 1;
168 for (int i = 0; i < num_pixels; ++i) data[i] = rnd->Rand16() & mask;
169 }
170
171 template <typename Pixel>
PrepBuffers(ACMRandom * rnd,int w,int h,int stride,int bd,bool trash_edges,Pixel * data)172 void PrepBuffers(ACMRandom *rnd, int w, int h, int stride, int bd,
173 bool trash_edges, Pixel *data) {
174 assert(rnd);
175 const Pixel mask = (1 << bd) - 1;
176
177 // Fill in the first buffer with random data
178 // Top border
179 FillEdge(rnd, stride * kVPad, bd, trash_edges, data);
180 for (int r = 0; r < h; ++r) {
181 Pixel *row_data = data + (kVPad + r) * stride;
182 // Left border, contents, right border
183 FillEdge(rnd, kHPad, bd, trash_edges, row_data);
184 for (int c = 0; c < w; ++c) row_data[kHPad + c] = rnd->Rand16() & mask;
185 FillEdge(rnd, kHPad, bd, trash_edges, row_data + kHPad + w);
186 }
187 // Bottom border
188 FillEdge(rnd, stride * kVPad, bd, trash_edges, data + stride * (kVPad + h));
189
190 const int bpp = sizeof(*data);
191 const int block_elts = stride * (h + 2 * kVPad);
192 const int block_size = bpp * block_elts;
193
194 // Now copy that to the second buffer
195 memcpy(data + block_elts, data, block_size);
196 }
197
198 template <typename SrcPixel>
Initialize(ACMRandom * rnd)199 void TestImage<SrcPixel>::Initialize(ACMRandom *rnd) {
200 PrepBuffers(rnd, w_, h_, src_stride_, bd_, false, &src_data_[0]);
201 PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_data_[0]);
202 PrepBuffers(rnd, w_, h_, dst_stride_, bd_, true, &dst_16_data_[0]);
203 }
204
205 template <typename SrcPixel>
Check() const206 void TestImage<SrcPixel>::Check() const {
207 // If memcmp returns 0, there's nothing to do.
208 const int num_pixels = dst_block_size();
209 const SrcPixel *ref_dst = &dst_data_[0];
210 const SrcPixel *tst_dst = &dst_data_[num_pixels];
211
212 const CONV_BUF_TYPE *ref_16_dst = &dst_16_data_[0];
213 const CONV_BUF_TYPE *tst_16_dst = &dst_16_data_[num_pixels];
214
215 if (0 == memcmp(ref_dst, tst_dst, sizeof(*ref_dst) * num_pixels)) {
216 if (0 == memcmp(ref_16_dst, tst_16_dst, sizeof(*ref_16_dst) * num_pixels))
217 return;
218 }
219 // Otherwise, iterate through the buffer looking for differences (including
220 // the edges)
221 const int stride = dst_stride_;
222 for (int r = 0; r < h_ + 2 * kVPad; ++r) {
223 for (int c = 0; c < w_ + 2 * kHPad; ++c) {
224 const int32_t ref_value = ref_dst[r * stride + c];
225 const int32_t tst_value = tst_dst[r * stride + c];
226
227 EXPECT_EQ(tst_value, ref_value)
228 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
229 }
230 }
231
232 for (int r = 0; r < h_ + 2 * kVPad; ++r) {
233 for (int c = 0; c < w_ + 2 * kHPad; ++c) {
234 const int32_t ref_value = ref_16_dst[r * stride + c];
235 const int32_t tst_value = tst_16_dst[r * stride + c];
236
237 EXPECT_EQ(tst_value, ref_value)
238 << "Error in 16 bit buffer "
239 << "Error at row: " << (r - kVPad) << ", col: " << (c - kHPad);
240 }
241 }
242 }
243
244 typedef tuple<int, int> BlockDimension;
245
246 struct BaseParams {
BaseParams__anon1792a3550111::BaseParams247 BaseParams(BlockDimension dims, NTaps ntaps_x, NTaps ntaps_y, bool avg)
248 : dims(dims), ntaps_x(ntaps_x), ntaps_y(ntaps_y), avg(avg) {}
249
250 BlockDimension dims;
251 NTaps ntaps_x, ntaps_y;
252 bool avg;
253 };
254
255 template <typename SrcPixel>
256 class ConvolveScaleTestBase : public ::testing::Test {
257 public:
ConvolveScaleTestBase()258 ConvolveScaleTestBase() : image_(nullptr) {}
~ConvolveScaleTestBase()259 virtual ~ConvolveScaleTestBase() { delete image_; }
TearDown()260 virtual void TearDown() {}
261
262 // Implemented by subclasses (SetUp depends on the parameters passed
263 // in and RunOne depends on the function to be tested. These can't
264 // be templated for low/high bit depths because they have different
265 // numbers of parameters)
266 virtual void SetUp() = 0;
267 virtual void RunOne(bool ref) = 0;
268
269 protected:
SetParams(const BaseParams & params,int bd)270 void SetParams(const BaseParams ¶ms, int bd) {
271 width_ = std::get<0>(params.dims);
272 height_ = std::get<1>(params.dims);
273 ntaps_x_ = params.ntaps_x;
274 ntaps_y_ = params.ntaps_y;
275 bd_ = bd;
276 avg_ = params.avg;
277
278 filter_x_.set(ntaps_x_, false);
279 filter_y_.set(ntaps_y_, true);
280 convolve_params_ =
281 get_conv_params_no_round(avg_ != false, 0, nullptr, 0, 1, bd);
282
283 delete image_;
284 image_ = new TestImage<SrcPixel>(width_, height_, bd_);
285 ASSERT_NE(image_, nullptr);
286 }
287
SetConvParamOffset(int i,int j,int is_compound,int do_average,int use_dist_wtd_comp_avg)288 void SetConvParamOffset(int i, int j, int is_compound, int do_average,
289 int use_dist_wtd_comp_avg) {
290 if (i == -1 && j == -1) {
291 convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg;
292 convolve_params_.is_compound = is_compound;
293 convolve_params_.do_average = do_average;
294 } else {
295 convolve_params_.use_dist_wtd_comp_avg = use_dist_wtd_comp_avg;
296 convolve_params_.fwd_offset = quant_dist_lookup_table[j][i];
297 convolve_params_.bck_offset = quant_dist_lookup_table[j][1 - i];
298 convolve_params_.is_compound = is_compound;
299 convolve_params_.do_average = do_average;
300 }
301 }
302
Run()303 void Run() {
304 ACMRandom rnd(ACMRandom::DeterministicSeed());
305 for (int i = 0; i < kTestIters; ++i) {
306 int is_compound = 0;
307 SetConvParamOffset(-1, -1, is_compound, 0, 0);
308 Prep(&rnd);
309 RunOne(true);
310 RunOne(false);
311 image_->Check();
312
313 is_compound = 1;
314 for (int do_average = 0; do_average < 2; do_average++) {
315 for (int use_dist_wtd_comp_avg = 0; use_dist_wtd_comp_avg < 2;
316 use_dist_wtd_comp_avg++) {
317 for (int j = 0; j < 2; ++j) {
318 for (int k = 0; k < 4; ++k) {
319 SetConvParamOffset(j, k, is_compound, do_average,
320 use_dist_wtd_comp_avg);
321 Prep(&rnd);
322 RunOne(true);
323 RunOne(false);
324 image_->Check();
325 }
326 }
327 }
328 }
329 }
330 }
331
SpeedTest()332 void SpeedTest() {
333 ACMRandom rnd(ACMRandom::DeterministicSeed());
334 Prep(&rnd);
335
336 aom_usec_timer ref_timer;
337 aom_usec_timer_start(&ref_timer);
338 for (int i = 0; i < kPerfIters; ++i) RunOne(true);
339 aom_usec_timer_mark(&ref_timer);
340 const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
341
342 aom_usec_timer tst_timer;
343 aom_usec_timer_start(&tst_timer);
344 for (int i = 0; i < kPerfIters; ++i) RunOne(false);
345 aom_usec_timer_mark(&tst_timer);
346 const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
347
348 std::cout << "[ ] C time = " << ref_time / 1000
349 << " ms, SIMD time = " << tst_time / 1000 << " ms\n";
350
351 EXPECT_GT(ref_time, tst_time)
352 << "Error: CDEFSpeedTest, SIMD slower than C.\n"
353 << "C time: " << ref_time << " us\n"
354 << "SIMD time: " << tst_time << " us\n";
355 }
356
RandomSubpel(ACMRandom * rnd)357 static int RandomSubpel(ACMRandom *rnd) {
358 const uint8_t subpel_mode = rnd->Rand8();
359 if ((subpel_mode & 7) == 0) {
360 return 0;
361 } else if ((subpel_mode & 7) == 1) {
362 return SCALE_SUBPEL_SHIFTS - 1;
363 } else {
364 return 1 + rnd->PseudoUniform(SCALE_SUBPEL_SHIFTS - 2);
365 }
366 }
367
Prep(ACMRandom * rnd)368 void Prep(ACMRandom *rnd) {
369 assert(rnd);
370
371 // Choose subpel_x_ and subpel_y_. They should be less than
372 // SCALE_SUBPEL_SHIFTS; we also want to add extra weight to "interesting"
373 // values: 0 and SCALE_SUBPEL_SHIFTS - 1
374 subpel_x_ = RandomSubpel(rnd);
375 subpel_y_ = RandomSubpel(rnd);
376
377 image_->Initialize(rnd);
378 }
379
380 int width_, height_, bd_;
381 NTaps ntaps_x_, ntaps_y_;
382 bool avg_;
383 int subpel_x_, subpel_y_;
384 TestFilter filter_x_, filter_y_;
385 TestImage<SrcPixel> *image_;
386 ConvolveParams convolve_params_;
387 };
388
389 typedef tuple<int, int> BlockDimension;
390
391 typedef void (*LowbdConvolveFunc)(const uint8_t *src, int src_stride,
392 uint8_t *dst, int dst_stride, int w, int h,
393 const InterpFilterParams *filter_params_x,
394 const InterpFilterParams *filter_params_y,
395 const int subpel_x_qn, const int x_step_qn,
396 const int subpel_y_qn, const int y_step_qn,
397 ConvolveParams *conv_params);
398
399 // Test parameter list:
400 // <tst_fun, dims, ntaps_x, ntaps_y, avg>
401 typedef tuple<LowbdConvolveFunc, BlockDimension, NTaps, NTaps, bool>
402 LowBDParams;
403
404 class LowBDConvolveScaleTest
405 : public ConvolveScaleTestBase<uint8_t>,
406 public ::testing::WithParamInterface<LowBDParams> {
407 public:
~LowBDConvolveScaleTest()408 virtual ~LowBDConvolveScaleTest() {}
409
SetUp()410 void SetUp() {
411 tst_fun_ = GET_PARAM(0);
412
413 const BlockDimension &block = GET_PARAM(1);
414 const NTaps ntaps_x = GET_PARAM(2);
415 const NTaps ntaps_y = GET_PARAM(3);
416 const int bd = 8;
417 const bool avg = GET_PARAM(4);
418
419 SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
420 }
421
RunOne(bool ref)422 void RunOne(bool ref) {
423 const uint8_t *src = image_->GetSrcData(ref, false);
424 uint8_t *dst = image_->GetDstData(ref, false);
425 convolve_params_.dst = image_->GetDst16Data(ref, false);
426 const int src_stride = image_->src_stride();
427 const int dst_stride = image_->dst_stride();
428 if (ref) {
429 av1_convolve_2d_scale_c(src, src_stride, dst, dst_stride, width_, height_,
430 &filter_x_.params_, &filter_y_.params_, subpel_x_,
431 kXStepQn, subpel_y_, kYStepQn, &convolve_params_);
432 } else {
433 tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
434 &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
435 subpel_y_, kYStepQn, &convolve_params_);
436 }
437 }
438
439 private:
440 LowbdConvolveFunc tst_fun_;
441 };
442
443 const BlockDimension kBlockDim[] = {
444 make_tuple(2, 2), make_tuple(2, 4), make_tuple(4, 4),
445 make_tuple(4, 8), make_tuple(8, 4), make_tuple(8, 8),
446 make_tuple(8, 16), make_tuple(16, 8), make_tuple(16, 16),
447 make_tuple(16, 32), make_tuple(32, 16), make_tuple(32, 32),
448 make_tuple(32, 64), make_tuple(64, 32), make_tuple(64, 64),
449 make_tuple(64, 128), make_tuple(128, 64), make_tuple(128, 128),
450 };
451
452 const NTaps kNTaps[] = { EIGHT_TAP };
453
TEST_P(LowBDConvolveScaleTest,Check)454 TEST_P(LowBDConvolveScaleTest, Check) { Run(); }
TEST_P(LowBDConvolveScaleTest,DISABLED_Speed)455 TEST_P(LowBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
456
457 INSTANTIATE_TEST_SUITE_P(
458 SSE4_1, LowBDConvolveScaleTest,
459 ::testing::Combine(::testing::Values(av1_convolve_2d_scale_sse4_1),
460 ::testing::ValuesIn(kBlockDim),
461 ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
462 ::testing::Bool()));
463
464 #if CONFIG_AV1_HIGHBITDEPTH
465 typedef void (*HighbdConvolveFunc)(const uint16_t *src, int src_stride,
466 uint16_t *dst, int dst_stride, int w, int h,
467 const InterpFilterParams *filter_params_x,
468 const InterpFilterParams *filter_params_y,
469 const int subpel_x_qn, const int x_step_qn,
470 const int subpel_y_qn, const int y_step_qn,
471 ConvolveParams *conv_params, int bd);
472
473 // Test parameter list:
474 // <tst_fun, dims, ntaps_x, ntaps_y, avg, bd>
475 typedef tuple<HighbdConvolveFunc, BlockDimension, NTaps, NTaps, bool, int>
476 HighBDParams;
477
478 class HighBDConvolveScaleTest
479 : public ConvolveScaleTestBase<uint16_t>,
480 public ::testing::WithParamInterface<HighBDParams> {
481 public:
~HighBDConvolveScaleTest()482 virtual ~HighBDConvolveScaleTest() {}
483
SetUp()484 void SetUp() {
485 tst_fun_ = GET_PARAM(0);
486
487 const BlockDimension &block = GET_PARAM(1);
488 const NTaps ntaps_x = GET_PARAM(2);
489 const NTaps ntaps_y = GET_PARAM(3);
490 const bool avg = GET_PARAM(4);
491 const int bd = GET_PARAM(5);
492
493 SetParams(BaseParams(block, ntaps_x, ntaps_y, avg), bd);
494 }
495
RunOne(bool ref)496 void RunOne(bool ref) {
497 const uint16_t *src = image_->GetSrcData(ref, false);
498 uint16_t *dst = image_->GetDstData(ref, false);
499 convolve_params_.dst = image_->GetDst16Data(ref, false);
500 const int src_stride = image_->src_stride();
501 const int dst_stride = image_->dst_stride();
502
503 if (ref) {
504 av1_highbd_convolve_2d_scale_c(
505 src, src_stride, dst, dst_stride, width_, height_, &filter_x_.params_,
506 &filter_y_.params_, subpel_x_, kXStepQn, subpel_y_, kYStepQn,
507 &convolve_params_, bd_);
508 } else {
509 tst_fun_(src, src_stride, dst, dst_stride, width_, height_,
510 &filter_x_.params_, &filter_y_.params_, subpel_x_, kXStepQn,
511 subpel_y_, kYStepQn, &convolve_params_, bd_);
512 }
513 }
514
515 private:
516 HighbdConvolveFunc tst_fun_;
517 };
518
519 const int kBDs[] = { 8, 10, 12 };
520
TEST_P(HighBDConvolveScaleTest,Check)521 TEST_P(HighBDConvolveScaleTest, Check) { Run(); }
TEST_P(HighBDConvolveScaleTest,DISABLED_Speed)522 TEST_P(HighBDConvolveScaleTest, DISABLED_Speed) { SpeedTest(); }
523
524 INSTANTIATE_TEST_SUITE_P(
525 SSE4_1, HighBDConvolveScaleTest,
526 ::testing::Combine(::testing::Values(av1_highbd_convolve_2d_scale_sse4_1),
527 ::testing::ValuesIn(kBlockDim),
528 ::testing::ValuesIn(kNTaps), ::testing::ValuesIn(kNTaps),
529 ::testing::Bool(), ::testing::ValuesIn(kBDs)));
530 #endif // CONFIG_AV1_HIGHBITDEPTH
531 } // namespace
532