• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "DFT.h"
25 
26 #include "PadLayer.h"
27 #include "Permute.h"
28 #include "Reverse.h"
29 #include "SliceOperations.h"
30 
31 #include <cmath>
32 
33 namespace arm_compute
34 {
35 namespace test
36 {
37 namespace validation
38 {
39 namespace reference
40 {
41 namespace
42 {
43 /** Performs an one dimensional DFT on a given real sequence.
44  *
45  * @param[in]  src_ptr Pointer to the real input sequence.
46  * @param[in]  N       Size of input sequence.
47  * @param[out] dst_ptr Pointer to the complex output sequence.
48  * @param[out] K       Size of the output sequence
49  */
50 template <typename T>
rdft_1d_step(const T * src_ptr,size_t N,T * dst_ptr,size_t K)51 void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
52 {
53 #if defined(_OPENMP)
54     #pragma omp parallel for
55 #endif /* _OPENMP */
56     for(unsigned int k = 0; k < K; ++k)
57     {
58         float Xr = 0;
59         float Xi = 0;
60         for(unsigned int n = 0; n < N; ++n)
61         {
62             const float alpha = (2 * M_PI * k * n) / N;
63             const float val_r = src_ptr[n];
64             // Assuming DFT from the R domain thus skipping imaginary calculations
65             Xr += val_r * cos(alpha);
66             Xi -= val_r * sin(alpha);
67         }
68 
69         dst_ptr[k * 2]     = Xr;
70         dst_ptr[k * 2 + 1] = Xi;
71     }
72 }
73 
74 /** Performs an one dimensional DFT on a given complex sequence.
75  *
76  * @param[in]  src_ptr Pointer to the complex input sequence.
77  * @param[out] dst_ptr Pointer to the complex output sequence.
78  * @param[in]  N       Size of the sequences
79  */
80 template <typename T>
dft_1d_step(const T * src_ptr,T * dst_ptr,size_t N)81 void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
82 {
83 #if defined(_OPENMP)
84     #pragma omp parallel for
85 #endif /* _OPENMP */
86     for(unsigned int k = 0; k < N; ++k)
87     {
88         float Xr = 0;
89         float Xi = 0;
90         for(unsigned int n = 0; n < N; ++n)
91         {
92             const float alpha     = (2 * M_PI * k * n) / N;
93             const float val_r     = src_ptr[2 * n];
94             const float val_i     = src_ptr[2 * n + 1];
95             const float cos_alpha = cos(alpha);
96             const float sin_alpha = sin(alpha);
97 
98             Xr += val_r * cos_alpha + val_i * sin_alpha;
99             Xi += val_i * cos_alpha - val_r * sin_alpha;
100         }
101 
102         dst_ptr[k * 2]     = Xr;
103         dst_ptr[k * 2 + 1] = Xi;
104     }
105 }
106 
107 /** Performs an one dimensional inverse DFT on a given real sequence.
108  *
109  * @param[in]  src_ptr Pointer to the real input sequence.
110  * @param[in]  K       Size of input sequence.
111  * @param[out] dst_ptr Pointer to the complex output sequence.
112  * @param[out] N       Size of the output sequence
113  */
114 template <typename T>
irdft_1d_step(const T * src_ptr,size_t K,T * dst_ptr,size_t N)115 void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
116 {
117     const bool         is_odd     = N % 2;
118     const unsigned int Nleft      = N - K;
119     const int          tail_start = is_odd ? K - 1 : K - 2;
120 #if defined(_OPENMP)
121     #pragma omp parallel for
122 #endif /* _OPENMP */
123     for(unsigned int n = 0; n < N; ++n)
124     {
125         float xr = 0;
126         for(unsigned int k = 0; k < K; ++k)
127         {
128             const float alpha = (2 * M_PI * k * n) / N;
129             xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
130         }
131 
132         unsigned int j = tail_start;
133         for(unsigned int k = 0; k < Nleft; ++k)
134         {
135             const float alpha = (2 * M_PI * (k + K) * n) / N;
136             xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
137             --j;
138         }
139 
140         dst_ptr[n] = xr;
141     }
142 }
143 
144 /** Performs an one dimensional inverse DFT on a given complex sequence.
145  *
146  * @param[in]  src_ptr Pointer to the complex input sequence.
147  * @param[out] dst_ptr Pointer to the complex output sequence.
148  * @param[in]  N       Size of the sequences
149  */
150 template <typename T>
idft_1d_step(const T * src_ptr,T * dst_ptr,size_t N)151 void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
152 {
153 #if defined(_OPENMP)
154     #pragma omp parallel for
155 #endif /* _OPENMP */
156     for(unsigned int n = 0; n < N; ++n)
157     {
158         float xr = 0;
159         float xi = 0;
160         for(unsigned int k = 0; k < N; ++k)
161         {
162             const float alpha     = (2 * M_PI * k * n) / N;
163             const float cos_alpha = cos(alpha);
164             const float sin_alpha = sin(alpha);
165             const float val_r     = src_ptr[2 * k];
166             const float val_i     = src_ptr[2 * k + 1];
167 
168             xr += val_r * cos_alpha - val_i * sin_alpha;
169             xi += val_i * cos_alpha + val_r * sin_alpha;
170         }
171 
172         dst_ptr[2 * n]     = xr;
173         dst_ptr[2 * n + 1] = xi;
174     }
175 }
176 
177 template <typename T>
rdft_1d_core(const SimpleTensor<T> & src,FFTDirection direction,bool is_odd)178 SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
179 {
180     // Performs only rdft
181     ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
182     ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
183 
184     const unsigned int inverse_tail = is_odd ? 1 : 0;
185     const unsigned int N            = src.shape()[0];
186     const unsigned int K            = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
187     const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
188 
189     TensorShape dst_shape = src.shape();
190     dst_shape.set(0, K);
191 
192     SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
193 
194     const unsigned int upper_dims = src.shape().total_size_upper(1);
195 #if defined(_OPENMP)
196     #pragma omp parallel for
197 #endif /* _OPENMP */
198     for(unsigned int du = 0; du < upper_dims; ++du)
199     {
200         const T *src_row_ptr = src.data() + du * N * src.num_channels();
201         T       *dst_row_ptr = dst.data() + du * K * dst.num_channels();
202         direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
203     }
204 
205     return dst;
206 }
207 
208 template <typename T>
dft_1d_core(const SimpleTensor<T> & src,FFTDirection direction)209 SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
210 {
211     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
212 
213     const unsigned int N = src.shape()[0];
214 
215     SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
216 
217     const unsigned int upper_dims = src.shape().total_size_upper(1);
218 #if defined(_OPENMP)
219     #pragma omp parallel for
220 #endif /* _OPENMP */
221     for(unsigned int du = 0; du < upper_dims; ++du)
222     {
223         const T *src_row_ptr = src.data() + du * N * src.num_channels();
224         T       *dst_row_ptr = dst.data() + du * N * dst.num_channels();
225         direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
226     }
227 
228     return dst;
229 }
230 
231 /** Scale a tensor by a given scaling factor.
232  *
233  * @param[in,out] tensor         Tensor to scale.
234  * @param[in]     scaling_factor Scaling to scale the tensor data with.
235  */
236 template <typename T>
scale(SimpleTensor<T> & tensor,T scaling_factor)237 void scale(SimpleTensor<T> &tensor, T scaling_factor)
238 {
239     const int total_elements = tensor.num_elements() * tensor.num_channels();
240     T        *data_ptr       = tensor.data();
241 #if defined(_OPENMP)
242     #pragma omp parallel for
243 #endif /* _OPENMP */
244     for(int i = 0; i < total_elements; ++i)
245     {
246         data_ptr[i] /= scaling_factor;
247     }
248 }
249 
250 /** Performs a complex element-wise multiplication with reduction across the channels axis.
251  *
252  * @param[in] input   Input tensor.
253  * @param[in] weights Weights tensor.
254  *
255  * @return Output tensor.
256  */
257 template <typename T>
complex_mul_and_reduce(const SimpleTensor<T> & input,const SimpleTensor<T> & weights)258 SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
259 {
260     const uint32_t W  = input.shape().x();
261     const uint32_t H  = input.shape().y();
262     const uint32_t Ci = input.shape().z();
263     const uint32_t Co = weights.shape()[3];
264     const uint32_t N  = input.shape().total_size() / (W * H * Ci);
265 
266     TensorShape output_shape = input.shape();
267     output_shape.set(2, Co);
268     SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
269 
270     // MemSet dst memory to zero
271     std::memset(dst.data(), 0, dst.size());
272 
273     for(uint32_t b = 0; b < N; ++b)
274     {
275         for(uint32_t co = 0; co < Co; ++co)
276         {
277             for(uint32_t ci = 0; ci < Ci; ++ci)
278             {
279                 for(uint32_t h = 0; h < H; ++h)
280                 {
281                     for(uint32_t w = 0; w < W; ++w)
282                     {
283                         const uint32_t    i_index  = w + h * W + ci * H * W + b * H * W * Ci;
284                         const uint32_t    w_index  = w + h * W + ci * H * W + co * H * W * Ci;
285                         const uint32_t    o_index  = w + h * W + co * H * W + b * H * W * Co;
286                         const Coordinates i_coords = index2coords(input.shape(), i_index);
287                         const Coordinates w_coords = index2coords(weights.shape(), w_index);
288                         const Coordinates o_coords = index2coords(dst.shape(), o_index);
289 
290                         auto i_ptr = static_cast<const T *>(input(i_coords));
291                         auto w_ptr = static_cast<const T *>(weights(w_coords));
292                         auto o_ptr = static_cast<T *>(dst(o_coords));
293 
294                         const T Rin = i_ptr[0];
295                         const T Iin = i_ptr[1];
296                         const T Rw  = w_ptr[0];
297                         const T Iw  = w_ptr[1];
298 
299                         o_ptr[0] += Rin * Rw - Iin * Iw;
300                         o_ptr[1] += Rin * Iw + Rw * Iin;
301                     }
302                 }
303             }
304         }
305     }
306     return dst;
307 }
308 } // namespace
309 
310 template <typename T>
rdft_1d(const SimpleTensor<T> & src)311 SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
312 {
313     return rdft_1d_core(src, FFTDirection::Forward, false);
314 }
315 
316 template <typename T>
ridft_1d(const SimpleTensor<T> & src,bool is_odd)317 SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
318 {
319     auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
320 
321     const T scaling_factor = dst.shape()[0];
322     scale(dst, scaling_factor);
323 
324     return dst;
325 }
326 
327 template <typename T>
dft_1d(const SimpleTensor<T> & src,FFTDirection direction)328 SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
329 {
330     auto dst = dft_1d_core(src, direction);
331     if(direction == FFTDirection::Inverse)
332     {
333         const T scaling_factor = dst.shape()[0];
334         scale(dst, scaling_factor);
335     }
336     return dst;
337 }
338 
339 template <typename T>
rdft_2d(const SimpleTensor<T> & src)340 SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
341 {
342     ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
343     constexpr FFTDirection direction = FFTDirection::Forward;
344 
345     auto first_pass  = rdft_1d_core(src, direction, false);
346     auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
347     auto second_pass = dft_1d_core(transposed, direction);
348     return permute(second_pass, PermutationVector(1U, 0U));
349 }
350 
351 template <typename T>
ridft_2d(const SimpleTensor<T> & src,bool is_odd)352 SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
353 {
354     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
355     constexpr FFTDirection direction = FFTDirection::Inverse;
356 
357     auto transposed   = permute(src, PermutationVector(1U, 0U));
358     auto first_pass   = dft_1d_core(transposed, direction);
359     auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
360     auto dst          = rdft_1d_core(transposed_2, direction, is_odd);
361 
362     const T scaling_factor = dst.shape()[0] * dst.shape()[1];
363     scale(dst, scaling_factor);
364     return dst;
365 }
366 
367 template <typename T>
dft_2d(const SimpleTensor<T> & src,FFTDirection direction)368 SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
369 {
370     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
371 
372     if(direction == FFTDirection::Forward)
373     {
374         auto first_pass  = dft_1d_core(src, direction);
375         auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
376         auto second_pass = dft_1d_core(transposed, direction);
377         return permute(second_pass, PermutationVector(1U, 0U));
378     }
379     else
380     {
381         auto transposed   = permute(src, PermutationVector(1U, 0U));
382         auto first_pass   = dft_1d_core(transposed, direction);
383         auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
384         auto dst          = dft_1d_core(transposed_2, direction);
385 
386         const T scaling_factor = dst.shape()[0] * dst.shape()[1];
387         scale(dst, scaling_factor);
388 
389         return dst;
390     }
391 }
392 
393 template <typename T>
conv2d_dft(const SimpleTensor<T> & src,const SimpleTensor<T> & w,const PadStrideInfo & conv_info)394 SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
395 {
396     // Pad input to full padding
397     const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
398     auto              padded_src = pad_layer(src, padding_in);
399 
400     // Flip weights
401     std::vector<uint32_t>  axis_v = { 0, 1 };
402     SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
403     std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
404     auto flipped_w = reverse(w, axis);
405 
406     // Pad weights to have the same size as input
407     const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
408     auto              padded_w   = pad_layer(flipped_w, paddings_w);
409 
410     // Transform input and weights to frequency domain
411     auto Fsrc = rdft_2d(padded_src);
412     auto Fw   = rdft_2d(padded_w);
413 
414     // Perform dot product
415     auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
416 
417     // Transform output back to frequency domain
418     auto conv_res = ridft_2d(Fdst);
419 
420     // Slice output
421     const int start_left = w.shape().x() - conv_info.pad_left() - 1;
422     const int start_top  = w.shape().y() - conv_info.pad_top() - 1;
423     const int end_right  = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
424     const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
425     return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
426 }
427 
428 template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
429 template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
430 template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
431 
432 template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
433 template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
434 template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
435 
436 template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
437 } // namespace reference
438 } // namespace validation
439 } // namespace test
440 } // namespace arm_compute
441