1 /*
2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13
14 #include <algorithm>
15 #include <complex>
16 #include <ostream>
17 #include <vector>
18
19 #include "aom_dsp/fft_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "av1/common/common.h"
22 #include "config/aom_dsp_rtcd.h"
23 #include "test/acm_random.h"
24 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
25
26 namespace {
27
28 typedef void (*tform_fun_t)(const float *input, float *temp, float *output);
29
30 // Simple 1D FFT implementation
31 template <typename InputType>
fft(const InputType * data,std::complex<float> * result,int n)32 void fft(const InputType *data, std::complex<float> *result, int n) {
33 if (n == 1) {
34 result[0] = data[0];
35 return;
36 }
37 std::vector<InputType> temp(n);
38 for (int k = 0; k < n / 2; ++k) {
39 temp[k] = data[2 * k];
40 temp[n / 2 + k] = data[2 * k + 1];
41 }
42 fft(&temp[0], result, n / 2);
43 fft(&temp[n / 2], result + n / 2, n / 2);
44 for (int k = 0; k < n / 2; ++k) {
45 std::complex<float> w = std::complex<float>((float)cos(2. * PI * k / n),
46 (float)-sin(2. * PI * k / n));
47 std::complex<float> a = result[k];
48 std::complex<float> b = result[n / 2 + k];
49 result[k] = a + w * b;
50 result[n / 2 + k] = a - w * b;
51 }
52 }
53
transpose(std::vector<std::complex<float>> * data,int n)54 void transpose(std::vector<std::complex<float> > *data, int n) {
55 for (int y = 0; y < n; ++y) {
56 for (int x = y + 1; x < n; ++x) {
57 std::swap((*data)[y * n + x], (*data)[x * n + y]);
58 }
59 }
60 }
61
62 // Simple 2D FFT implementation
63 template <class InputType>
fft2d(const InputType * input,int n)64 std::vector<std::complex<float> > fft2d(const InputType *input, int n) {
65 std::vector<std::complex<float> > rowfft(n * n);
66 std::vector<std::complex<float> > result(n * n);
67 for (int y = 0; y < n; ++y) {
68 fft(input + y * n, &rowfft[y * n], n);
69 }
70 transpose(&rowfft, n);
71 for (int y = 0; y < n; ++y) {
72 fft(&rowfft[y * n], &result[y * n], n);
73 }
74 transpose(&result, n);
75 return result;
76 }
77
78 struct FFTTestArg {
79 int n;
80 void (*fft)(const float *input, float *temp, float *output);
FFTTestArg__anon532d709b0111::FFTTestArg81 FFTTestArg(int n_in, tform_fun_t fft_in) : n(n_in), fft(fft_in) {}
82 };
83
operator <<(std::ostream & os,const FFTTestArg & test_arg)84 std::ostream &operator<<(std::ostream &os, const FFTTestArg &test_arg) {
85 return os << "fft_arg { n:" << test_arg.n << " fft:" << test_arg.fft << " }";
86 }
87
88 class FFT2DTest : public ::testing::TestWithParam<FFTTestArg> {
89 protected:
SetUp()90 void SetUp() {
91 int n = GetParam().n;
92 input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n);
93 temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n);
94 output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n * 2);
95 ASSERT_NE(input_, nullptr);
96 ASSERT_NE(temp_, nullptr);
97 ASSERT_NE(output_, nullptr);
98 memset(input_, 0, sizeof(*input_) * n * n);
99 memset(temp_, 0, sizeof(*temp_) * n * n);
100 memset(output_, 0, sizeof(*output_) * n * n * 2);
101 }
TearDown()102 void TearDown() {
103 aom_free(input_);
104 aom_free(temp_);
105 aom_free(output_);
106 }
107 float *input_;
108 float *temp_;
109 float *output_;
110 };
111
TEST_P(FFT2DTest,Correct)112 TEST_P(FFT2DTest, Correct) {
113 int n = GetParam().n;
114 for (int i = 0; i < n * n; ++i) {
115 input_[i] = 1;
116 std::vector<std::complex<float> > expected = fft2d<float>(&input_[0], n);
117 GetParam().fft(&input_[0], &temp_[0], &output_[0]);
118 for (int y = 0; y < n; ++y) {
119 for (int x = 0; x < (n / 2) + 1; ++x) {
120 EXPECT_NEAR(expected[y * n + x].real(), output_[2 * (y * n + x)], 1e-5);
121 EXPECT_NEAR(expected[y * n + x].imag(), output_[2 * (y * n + x) + 1],
122 1e-5);
123 }
124 }
125 input_[i] = 0;
126 }
127 }
128
TEST_P(FFT2DTest,Benchmark)129 TEST_P(FFT2DTest, Benchmark) {
130 int n = GetParam().n;
131 float sum = 0;
132 const int num_trials = 1000 * (64 - n);
133 for (int i = 0; i < num_trials; ++i) {
134 input_[i % (n * n)] = 1;
135 GetParam().fft(&input_[0], &temp_[0], &output_[0]);
136 sum += output_[0];
137 input_[i % (n * n)] = 0;
138 }
139 EXPECT_NEAR(sum, num_trials, 1e-3);
140 }
141
142 INSTANTIATE_TEST_SUITE_P(C, FFT2DTest,
143 ::testing::Values(FFTTestArg(2, aom_fft2x2_float_c),
144 FFTTestArg(4, aom_fft4x4_float_c),
145 FFTTestArg(8, aom_fft8x8_float_c),
146 FFTTestArg(16, aom_fft16x16_float_c),
147 FFTTestArg(32,
148 aom_fft32x32_float_c)));
149 #if ARCH_X86 || ARCH_X86_64
150 #if HAVE_SSE2
151 INSTANTIATE_TEST_SUITE_P(
152 SSE2, FFT2DTest,
153 ::testing::Values(FFTTestArg(4, aom_fft4x4_float_sse2),
154 FFTTestArg(8, aom_fft8x8_float_sse2),
155 FFTTestArg(16, aom_fft16x16_float_sse2),
156 FFTTestArg(32, aom_fft32x32_float_sse2)));
157 #endif // HAVE_SSE2
158 #if HAVE_AVX2
159 INSTANTIATE_TEST_SUITE_P(
160 AVX2, FFT2DTest,
161 ::testing::Values(FFTTestArg(8, aom_fft8x8_float_avx2),
162 FFTTestArg(16, aom_fft16x16_float_avx2),
163 FFTTestArg(32, aom_fft32x32_float_avx2)));
164 #endif // HAVE_AVX2
165 #endif // ARCH_X86 || ARCH_X86_64
166
167 struct IFFTTestArg {
168 int n;
169 tform_fun_t ifft;
IFFTTestArg__anon532d709b0111::IFFTTestArg170 IFFTTestArg(int n_in, tform_fun_t ifft_in) : n(n_in), ifft(ifft_in) {}
171 };
172
operator <<(std::ostream & os,const IFFTTestArg & test_arg)173 std::ostream &operator<<(std::ostream &os, const IFFTTestArg &test_arg) {
174 return os << "ifft_arg { n:" << test_arg.n << " fft:" << test_arg.ifft
175 << " }";
176 }
177
178 class IFFT2DTest : public ::testing::TestWithParam<IFFTTestArg> {
179 protected:
SetUp()180 void SetUp() {
181 int n = GetParam().n;
182 input_ = (float *)aom_memalign(32, sizeof(*input_) * n * n * 2);
183 temp_ = (float *)aom_memalign(32, sizeof(*temp_) * n * n * 2);
184 output_ = (float *)aom_memalign(32, sizeof(*output_) * n * n);
185 ASSERT_NE(input_, nullptr);
186 ASSERT_NE(temp_, nullptr);
187 ASSERT_NE(output_, nullptr);
188 memset(input_, 0, sizeof(*input_) * n * n * 2);
189 memset(temp_, 0, sizeof(*temp_) * n * n * 2);
190 memset(output_, 0, sizeof(*output_) * n * n);
191 }
TearDown()192 void TearDown() {
193 aom_free(input_);
194 aom_free(temp_);
195 aom_free(output_);
196 }
197 float *input_;
198 float *temp_;
199 float *output_;
200 };
201
TEST_P(IFFT2DTest,Correctness)202 TEST_P(IFFT2DTest, Correctness) {
203 int n = GetParam().n;
204 ASSERT_GE(n, 2);
205 std::vector<float> expected(n * n);
206 std::vector<float> actual(n * n);
207 // Do forward transform then invert to make sure we get back expected
208 for (int y = 0; y < n; ++y) {
209 for (int x = 0; x < n; ++x) {
210 expected[y * n + x] = 1;
211 std::vector<std::complex<float> > input_c = fft2d(&expected[0], n);
212 for (int i = 0; i < n * n; ++i) {
213 input_[2 * i + 0] = input_c[i].real();
214 input_[2 * i + 1] = input_c[i].imag();
215 }
216 GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
217
218 for (int yy = 0; yy < n; ++yy) {
219 for (int xx = 0; xx < n; ++xx) {
220 EXPECT_NEAR(expected[yy * n + xx], output_[yy * n + xx] / (n * n),
221 1e-5);
222 }
223 }
224 expected[y * n + x] = 0;
225 }
226 }
227 }
228
TEST_P(IFFT2DTest,Benchmark)229 TEST_P(IFFT2DTest, Benchmark) {
230 int n = GetParam().n;
231 float sum = 0;
232 const int num_trials = 1000 * (64 - n);
233 for (int i = 0; i < num_trials; ++i) {
234 input_[i % (n * n)] = 1;
235 GetParam().ifft(&input_[0], &temp_[0], &output_[0]);
236 sum += output_[0];
237 input_[i % (n * n)] = 0;
238 }
239 EXPECT_GE(sum, num_trials / 2);
240 }
241 INSTANTIATE_TEST_SUITE_P(
242 C, IFFT2DTest,
243 ::testing::Values(IFFTTestArg(2, aom_ifft2x2_float_c),
244 IFFTTestArg(4, aom_ifft4x4_float_c),
245 IFFTTestArg(8, aom_ifft8x8_float_c),
246 IFFTTestArg(16, aom_ifft16x16_float_c),
247 IFFTTestArg(32, aom_ifft32x32_float_c)));
248 #if ARCH_X86 || ARCH_X86_64
249 #if HAVE_SSE2
250 INSTANTIATE_TEST_SUITE_P(
251 SSE2, IFFT2DTest,
252 ::testing::Values(IFFTTestArg(4, aom_ifft4x4_float_sse2),
253 IFFTTestArg(8, aom_ifft8x8_float_sse2),
254 IFFTTestArg(16, aom_ifft16x16_float_sse2),
255 IFFTTestArg(32, aom_ifft32x32_float_sse2)));
256 #endif // HAVE_SSE2
257
258 #if HAVE_AVX2
259 INSTANTIATE_TEST_SUITE_P(
260 AVX2, IFFT2DTest,
261 ::testing::Values(IFFTTestArg(8, aom_ifft8x8_float_avx2),
262 IFFTTestArg(16, aom_ifft16x16_float_avx2),
263 IFFTTestArg(32, aom_ifft32x32_float_avx2)));
264 #endif // HAVE_AVX2
265 #endif // ARCH_X86 || ARCH_X86_64
266
267 } // namespace
268