• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <tuple>
13 
14 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
15 
16 #include "config/av1_rtcd.h"
17 
18 #include "test/acm_random.h"
19 #include "test/av1_txfm_test.h"
20 #include "test/register_state_check.h"
21 #include "test/util.h"
22 #include "av1/common/enums.h"
23 #include "av1/common/scan.h"
24 #include "aom_dsp/aom_dsp_common.h"
25 #include "aom_ports/mem.h"
26 
27 namespace {
28 
29 using libaom_test::ACMRandom;
30 using std::tuple;
31 
32 typedef void (*HbdHtFunc)(const int16_t *input, int32_t *output, int stride,
33                           TX_TYPE tx_type, int bd);
34 
35 typedef void (*IHbdHtFunc)(const int32_t *coeff, uint16_t *output, int stride,
36                            TX_TYPE tx_type, int bd);
37 static const char *tx_type_name[] = {
38   "DCT_DCT",
39   "ADST_DCT",
40   "DCT_ADST",
41   "ADST_ADST",
42   "FLIPADST_DCT",
43   "DCT_FLIPADST",
44   "FLIPADST_FLIPADST",
45   "ADST_FLIPADST",
46   "FLIPADST_ADST",
47   "IDTX",
48   "V_DCT",
49   "H_DCT",
50   "V_ADST",
51   "H_ADST",
52   "V_FLIPADST",
53   "H_FLIPADST",
54 };
55 // Test parameter argument list:
56 //   <transform reference function,
57 //    optimized inverse transform function,
58 //    inverse transform reference function,
59 //    num_coeffs,
60 //    tx_type,
61 //    bit_depth>
62 typedef tuple<HbdHtFunc, IHbdHtFunc, IHbdHtFunc, int, TX_TYPE, int> IHbdHtParam;
63 
64 class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
65  public:
~AV1HighbdInvHTNxN()66   virtual ~AV1HighbdInvHTNxN() {}
67 
SetUp()68   virtual void SetUp() {
69     txfm_ref_ = GET_PARAM(0);
70     inv_txfm_ = GET_PARAM(1);
71     inv_txfm_ref_ = GET_PARAM(2);
72     num_coeffs_ = GET_PARAM(3);
73     tx_type_ = GET_PARAM(4);
74     bit_depth_ = GET_PARAM(5);
75 
76     input_ = reinterpret_cast<int16_t *>(
77         aom_memalign(16, sizeof(input_[0]) * num_coeffs_));
78 
79     // Note:
80     // Inverse transform input buffer is 32-byte aligned
81     // Refer to <root>/av1/encoder/context_tree.c, function,
82     // void alloc_mode_context().
83     coeffs_ = reinterpret_cast<int32_t *>(
84         aom_memalign(32, sizeof(coeffs_[0]) * num_coeffs_));
85     output_ = reinterpret_cast<uint16_t *>(
86         aom_memalign(32, sizeof(output_[0]) * num_coeffs_));
87     output_ref_ = reinterpret_cast<uint16_t *>(
88         aom_memalign(32, sizeof(output_ref_[0]) * num_coeffs_));
89   }
90 
TearDown()91   virtual void TearDown() {
92     aom_free(input_);
93     aom_free(coeffs_);
94     aom_free(output_);
95     aom_free(output_ref_);
96   }
97 
98  protected:
99   void RunBitexactCheck();
100 
101  private:
GetStride() const102   int GetStride() const {
103     if (16 == num_coeffs_) {
104       return 4;
105     } else if (64 == num_coeffs_) {
106       return 8;
107     } else if (256 == num_coeffs_) {
108       return 16;
109     } else if (1024 == num_coeffs_) {
110       return 32;
111     } else if (4096 == num_coeffs_) {
112       return 64;
113     } else {
114       return 0;
115     }
116   }
117 
118   HbdHtFunc txfm_ref_;
119   IHbdHtFunc inv_txfm_;
120   IHbdHtFunc inv_txfm_ref_;
121   int num_coeffs_;
122   TX_TYPE tx_type_;
123   int bit_depth_;
124 
125   int16_t *input_;
126   int32_t *coeffs_;
127   uint16_t *output_;
128   uint16_t *output_ref_;
129 };
130 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvHTNxN);
131 
RunBitexactCheck()132 void AV1HighbdInvHTNxN::RunBitexactCheck() {
133   ACMRandom rnd(ACMRandom::DeterministicSeed());
134   const int stride = GetStride();
135   const int num_tests = 20000;
136   const uint16_t mask = (1 << bit_depth_) - 1;
137 
138   for (int i = 0; i < num_tests; ++i) {
139     for (int j = 0; j < num_coeffs_; ++j) {
140       input_[j] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
141       output_ref_[j] = rnd.Rand16() & mask;
142       output_[j] = output_ref_[j];
143     }
144 
145     txfm_ref_(input_, coeffs_, stride, tx_type_, bit_depth_);
146     inv_txfm_ref_(coeffs_, output_ref_, stride, tx_type_, bit_depth_);
147     API_REGISTER_STATE_CHECK(
148         inv_txfm_(coeffs_, output_, stride, tx_type_, bit_depth_));
149 
150     for (int j = 0; j < num_coeffs_; ++j) {
151       EXPECT_EQ(output_ref_[j], output_[j])
152           << "Not bit-exact result at index: " << j << " At test block: " << i;
153     }
154   }
155 }
156 
TEST_P(AV1HighbdInvHTNxN,InvTransResultCheck)157 TEST_P(AV1HighbdInvHTNxN, InvTransResultCheck) { RunBitexactCheck(); }
158 
159 using std::make_tuple;
160 
161 #if HAVE_SSE4_1
162 #define PARAM_LIST_4X4                                   \
163   &av1_fwd_txfm2d_4x4_c, &av1_inv_txfm2d_add_4x4_sse4_1, \
164       &av1_inv_txfm2d_add_4x4_c, 16
165 
166 const IHbdHtParam kArrayIhtParam[] = {
167   // 4x4
168   make_tuple(PARAM_LIST_4X4, DCT_DCT, 10),
169   make_tuple(PARAM_LIST_4X4, DCT_DCT, 12),
170   make_tuple(PARAM_LIST_4X4, ADST_DCT, 10),
171   make_tuple(PARAM_LIST_4X4, ADST_DCT, 12),
172   make_tuple(PARAM_LIST_4X4, DCT_ADST, 10),
173   make_tuple(PARAM_LIST_4X4, DCT_ADST, 12),
174   make_tuple(PARAM_LIST_4X4, ADST_ADST, 10),
175   make_tuple(PARAM_LIST_4X4, ADST_ADST, 12),
176   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 10),
177   make_tuple(PARAM_LIST_4X4, FLIPADST_DCT, 12),
178   make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 10),
179   make_tuple(PARAM_LIST_4X4, DCT_FLIPADST, 12),
180   make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 10),
181   make_tuple(PARAM_LIST_4X4, FLIPADST_FLIPADST, 12),
182   make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 10),
183   make_tuple(PARAM_LIST_4X4, ADST_FLIPADST, 12),
184   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 10),
185   make_tuple(PARAM_LIST_4X4, FLIPADST_ADST, 12),
186 };
187 
188 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvHTNxN,
189                          ::testing::ValuesIn(kArrayIhtParam));
190 #endif  // HAVE_SSE4_1
191 
192 typedef void (*HighbdInvTxfm2dFunc)(const int32_t *input, uint8_t *output,
193                                     int stride, const TxfmParam *txfm_param);
194 
195 typedef std::tuple<const HighbdInvTxfm2dFunc> AV1HighbdInvTxfm2dParam;
196 class AV1HighbdInvTxfm2d
197     : public ::testing::TestWithParam<AV1HighbdInvTxfm2dParam> {
198  public:
SetUp()199   virtual void SetUp() { target_func_ = GET_PARAM(0); }
200   void RunAV1InvTxfm2dTest(TX_TYPE tx_type, TX_SIZE tx_size, int run_times,
201                            int bit_depth, int gt_int16 = 0);
202 
203  private:
204   HighbdInvTxfm2dFunc target_func_;
205 };
206 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(AV1HighbdInvTxfm2d);
207 
RunAV1InvTxfm2dTest(TX_TYPE tx_type_,TX_SIZE tx_size_,int run_times,int bit_depth_,int gt_int16)208 void AV1HighbdInvTxfm2d::RunAV1InvTxfm2dTest(TX_TYPE tx_type_, TX_SIZE tx_size_,
209                                              int run_times, int bit_depth_,
210                                              int gt_int16) {
211 #if CONFIG_REALTIME_ONLY
212   if (tx_size_ >= TX_4X16) {
213     return;
214   }
215 #endif
216   FwdTxfm2dFunc fwd_func_ = libaom_test::fwd_txfm_func_ls[tx_size_];
217   TxfmParam txfm_param;
218   const int BLK_WIDTH = 64;
219   const int BLK_SIZE = BLK_WIDTH * BLK_WIDTH;
220   DECLARE_ALIGNED(16, int16_t, input[BLK_SIZE]) = { 0 };
221   DECLARE_ALIGNED(32, int32_t, inv_input[BLK_SIZE]) = { 0 };
222   DECLARE_ALIGNED(32, uint16_t, output[BLK_SIZE]) = { 0 };
223   DECLARE_ALIGNED(32, uint16_t, ref_output[BLK_SIZE]) = { 0 };
224   int stride = BLK_WIDTH;
225   int rows = tx_size_high[tx_size_];
226   int cols = tx_size_wide[tx_size_];
227   const int rows_nonezero = AOMMIN(32, rows);
228   const int cols_nonezero = AOMMIN(32, cols);
229   const uint16_t mask = (1 << bit_depth_) - 1;
230   run_times /= (rows * cols);
231   run_times = AOMMAX(1, run_times);
232   const SCAN_ORDER *scan_order = get_default_scan(tx_size_, tx_type_);
233   const int16_t *scan = scan_order->scan;
234   const int16_t eobmax = rows_nonezero * cols_nonezero;
235   ACMRandom rnd(ACMRandom::DeterministicSeed());
236   int randTimes = run_times == 1 ? (eobmax) : 1;
237 
238   txfm_param.tx_type = tx_type_;
239   txfm_param.tx_size = tx_size_;
240   txfm_param.lossless = 0;
241   txfm_param.bd = bit_depth_;
242   txfm_param.is_hbd = 1;
243   txfm_param.tx_set_type = EXT_TX_SET_ALL16;
244 
245   for (int cnt = 0; cnt < randTimes; ++cnt) {
246     for (int r = 0; r < BLK_WIDTH; ++r) {
247       for (int c = 0; c < BLK_WIDTH; ++c) {
248         input[r * cols + c] = (rnd.Rand16() & mask) - (rnd.Rand16() & mask);
249         output[r * stride + c] = rnd.Rand16() & mask;
250 
251         ref_output[r * stride + c] = output[r * stride + c];
252       }
253     }
254     fwd_func_(input, inv_input, stride, tx_type_, bit_depth_);
255 
256     // produce eob input by setting high freq coeffs to zero
257     const int eob = AOMMIN(cnt + 1, eobmax);
258     for (int i = eob; i < eobmax; i++) {
259       inv_input[scan[i]] = 0;
260     }
261     txfm_param.eob = eob;
262     if (gt_int16) {
263       const uint16_t inv_input_mask =
264           static_cast<uint16_t>((1 << (bit_depth_ + 7)) - 1);
265       for (int i = 0; i < eob; i++) {
266         inv_input[scan[i]] = (rnd.Rand31() & inv_input_mask);
267       }
268     }
269 
270     aom_usec_timer ref_timer, test_timer;
271     aom_usec_timer_start(&ref_timer);
272     for (int i = 0; i < run_times; ++i) {
273       av1_highbd_inv_txfm_add_c(inv_input, CONVERT_TO_BYTEPTR(ref_output),
274                                 stride, &txfm_param);
275     }
276     aom_usec_timer_mark(&ref_timer);
277     const int elapsed_time_c =
278         static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
279 
280     aom_usec_timer_start(&test_timer);
281     for (int i = 0; i < run_times; ++i) {
282       target_func_(inv_input, CONVERT_TO_BYTEPTR(output), stride, &txfm_param);
283     }
284     aom_usec_timer_mark(&test_timer);
285     const int elapsed_time_simd =
286         static_cast<int>(aom_usec_timer_elapsed(&test_timer));
287     if (run_times > 10) {
288       printf(
289           "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
290           "gain=%d \n",
291           tx_size_, tx_type_, elapsed_time_c, elapsed_time_simd,
292           (elapsed_time_c / elapsed_time_simd));
293     } else {
294       for (int r = 0; r < rows; ++r) {
295         for (int c = 0; c < cols; ++c) {
296           ASSERT_EQ(ref_output[r * stride + c], output[r * stride + c])
297               << "[" << r << "," << c << "] " << cnt
298               << " tx_size: " << static_cast<int>(tx_size_)
299               << " bit_depth_: " << bit_depth_
300               << " tx_type: " << tx_type_name[tx_type_] << " eob " << eob;
301         }
302       }
303     }
304   }
305 }
306 
TEST_P(AV1HighbdInvTxfm2d,match)307 TEST_P(AV1HighbdInvTxfm2d, match) {
308   int bitdepth_ar[3] = { 8, 10, 12 };
309   for (int k = 0; k < 3; ++k) {
310     int bd = bitdepth_ar[k];
311     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
312       for (int i = 0; i < (int)TX_TYPES; ++i) {
313         if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
314                                            static_cast<TX_TYPE>(i))) {
315           RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
316                               1, bd);
317         }
318       }
319     }
320   }
321 }
322 
TEST_P(AV1HighbdInvTxfm2d,gt_int16)323 TEST_P(AV1HighbdInvTxfm2d, gt_int16) {
324   int bitdepth_ar[3] = { 8, 10, 12 };
325   static const TX_TYPE types[] = {
326     DCT_DCT, ADST_DCT, FLIPADST_DCT, IDTX, V_DCT, H_DCT, H_ADST, H_FLIPADST
327   };
328   for (int k = 0; k < 3; ++k) {
329     int bd = bitdepth_ar[k];
330     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
331       const TX_SIZE sz = static_cast<TX_SIZE>(j);
332       for (uint8_t i = 0; i < sizeof(types) / sizeof(TX_TYPE); ++i) {
333         const TX_TYPE tp = types[i];
334         if (libaom_test::IsTxSizeTypeValid(sz, tp)) {
335           RunAV1InvTxfm2dTest(tp, sz, 1, bd, 1);
336         }
337       }
338     }
339   }
340 }
341 
TEST_P(AV1HighbdInvTxfm2d,DISABLED_Speed)342 TEST_P(AV1HighbdInvTxfm2d, DISABLED_Speed) {
343   int bitdepth_ar[2] = { 10, 12 };
344   for (int k = 0; k < 2; ++k) {
345     int bd = bitdepth_ar[k];
346     for (int j = 0; j < (int)(TX_SIZES_ALL); ++j) {
347       for (int i = 0; i < (int)TX_TYPES; ++i) {
348         if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(j),
349                                            static_cast<TX_TYPE>(i))) {
350           RunAV1InvTxfm2dTest(static_cast<TX_TYPE>(i), static_cast<TX_SIZE>(j),
351                               1000000, bd);
352         }
353       }
354     }
355   }
356 }
357 
358 #if HAVE_SSE4_1
359 INSTANTIATE_TEST_SUITE_P(SSE4_1, AV1HighbdInvTxfm2d,
360                          ::testing::Values(av1_highbd_inv_txfm_add_sse4_1));
361 #endif
362 
363 #if HAVE_AVX2
364 INSTANTIATE_TEST_SUITE_P(AVX2, AV1HighbdInvTxfm2d,
365                          ::testing::Values(av1_highbd_inv_txfm_add_avx2));
366 #endif
367 
368 #if HAVE_NEON
369 INSTANTIATE_TEST_SUITE_P(NEON, AV1HighbdInvTxfm2d,
370                          ::testing::Values(av1_highbd_inv_txfm_add_neon));
371 #endif
372 
373 }  // namespace
374