• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2013  Julien Pommier ( pommier@modartt.com )
2 
3    Based on original fortran 77 code from FFTPACKv4 from NETLIB,
4    authored by Dr Paul Swarztrauber of NCAR, in 1985.
5 
6    As confirmed by the NCAR fftpack software curators, the following
7    FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
8    released under the same terms.
9 
10    FFTPACK license:
11 
12    http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
13 
14    Copyright (c) 2004 the University Corporation for Atmospheric
15    Research ("UCAR"). All rights reserved. Developed by NCAR's
16    Computational and Information Systems Laboratory, UCAR,
17    www.cisl.ucar.edu.
18 
19    Redistribution and use of the Software in source and binary forms,
20    with or without modification, is permitted provided that the
21    following conditions are met:
22 
23    - Neither the names of NCAR's Computational and Information Systems
24    Laboratory, the University Corporation for Atmospheric Research,
25    nor the names of its sponsors or contributors may be used to
26    endorse or promote products derived from this Software without
27    specific prior written permission.
28 
29    - Redistributions of source code must retain the above copyright
30    notices, this list of conditions, and the disclaimer below.
31 
32    - Redistributions in binary form must reproduce the above copyright
33    notice, this list of conditions, and the disclaimer below in the
34    documentation and/or other materials provided with the
35    distribution.
36 
37    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
38    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
39    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
41    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
42    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
43    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
44    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
45    SOFTWARE.
46 */
47 
48 /*
49    PFFFT : a Pretty Fast FFT.
50 
51    This is basically an adaptation of the single precision fftpack
52    (v4) as found on netlib taking advantage of SIMD instruction found
53    on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON).
54 
55    For architectures where no SIMD instruction is available, the code
56    falls back to a scalar version.
57 
58    Restrictions:
59 
60    - 1D transforms only, with 32-bit single precision.
61 
62    - supports only transforms for inputs of length N of the form
63    N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128,
64    144, 160, etc are all acceptable lengths). Performance is best for
65    128<=N<=8192.
66 
67    - all (float*) pointers in the functions below are expected to
68    have an "simd-compatible" alignment, that is 16 bytes on x86 and
69    powerpc CPUs.
70 
71    You can allocate such buffers with the functions
72    pffft_aligned_malloc / pffft_aligned_free (or with stuff like
73    posix_memalign..)
74 
75 */
76 
77 #ifndef PFFFT_H
78 #define PFFFT_H
79 
80 #include <stddef.h> /* for size_t */
81 
82 #ifdef __cplusplus
83 extern "C" {
84 #endif
85 
86   /* opaque struct holding internal stuff (precomputed twiddle factors)
87      this struct can be shared by many threads as it contains only
88      read-only data.
89   */
90   typedef struct PFFFT_Setup PFFFT_Setup;
91 
92 #ifndef PFFFT_COMMON_ENUMS
93 #define PFFFT_COMMON_ENUMS
94 
95   /* direction of the transform */
96   typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t;
97 
98   /* type of transform */
99   typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t;
100 
101 #endif
102 
103   /*
104     prepare for performing transforms of size N -- the returned
105     PFFFT_Setup structure is read-only so it can safely be shared by
106     multiple concurrent threads.
107   */
108   PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform);
109   void pffft_destroy_setup(PFFFT_Setup *);
110   /*
111      Perform a Fourier transform , The z-domain data is stored in the
112      most efficient order for transforming it back, or using it for
113      convolution. If you need to have its content sorted in the
114      "usual" way, that is as an array of interleaved complex numbers,
115      either use pffft_transform_ordered , or call pffft_zreorder after
116      the forward fft, and before the backward fft.
117 
118      Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x.
119      Typically you will want to scale the backward transform by 1/N.
120 
121      The 'work' pointer should point to an area of N (2*N for complex
122      fft) floats, properly aligned. If 'work' is NULL, then stack will
123      be used instead (this is probably the best strategy for small
124      FFTs, say for N < 16384). Threads usually have a small stack, that
125      there's no sufficient amount of memory, usually leading to a crash!
126      Use the heap with pffft_aligned_malloc() in this case.
127 
128      For a real forward transform (PFFFT_REAL | PFFFT_FORWARD) with real
129      input with input(=transformation) length N, the output array is
130      'mostly' complex:
131        index k in 1 .. N/2 -1  corresponds to frequency k * Samplerate / N
132        index k == 0 is a special case:
133          the real() part contains the result for the DC frequency 0,
134          the imag() part contains the result for the Nyquist frequency Samplerate/2
135      both 0-frequency and half frequency components, which are real,
136      are assembled in the first entry as  F(0)+i*F(N/2).
137      With the output size N/2 complex values (=N real/imag values), it is
138      obvious, that the result for negative frequencies are not output,
139      cause of symmetry.
140 
141      input and output may alias.
142   */
143   void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
144 
145   /*
146      Similar to pffft_transform, but makes sure that the output is
147      ordered as expected (interleaved complex numbers).  This is
148      similar to calling pffft_transform and then pffft_zreorder.
149 
150      input and output may alias.
151   */
152   void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction);
153 
154   /*
155      call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(...,
156      PFFFT_FORWARD) if you want to have the frequency components in
157      the correct "canonical" order, as interleaved complex numbers.
158 
159      (for real transforms, both 0-frequency and half frequency
160      components, which are real, are assembled in the first entry as
161      F(0)+i*F(n/2+1). Note that the original fftpack did place
162      F(n/2+1) at the end of the arrays).
163 
164      input and output should not alias.
165   */
166   void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction);
167 
168   /*
169      Perform a multiplication of the frequency components of dft_a and
170      dft_b and accumulate them into dft_ab. The arrays should have
171      been obtained with pffft_transform(.., PFFFT_FORWARD) and should
172      *not* have been reordered with pffft_zreorder (otherwise just
173      perform the operation yourself as the dft coefs are stored as
174      interleaved complex numbers).
175 
176      the operation performed is: dft_ab += (dft_a * fdt_b)*scaling
177 
178      The dft_a, dft_b and dft_ab pointers may alias.
179   */
180   void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
181 
182   /*
183      Perform a multiplication of the frequency components of dft_a and
184      dft_b and put result in dft_ab. The arrays should have
185      been obtained with pffft_transform(.., PFFFT_FORWARD) and should
186      *not* have been reordered with pffft_zreorder (otherwise just
187      perform the operation yourself as the dft coefs are stored as
188      interleaved complex numbers).
189 
190      the operation performed is: dft_ab = (dft_a * fdt_b)*scaling
191 
192      The dft_a, dft_b and dft_ab pointers may alias.
193   */
194   void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling);
195 
196   /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */
197   int pffft_simd_size();
198 
199   /* return string identifier of used architecture (SSE/NEON/Altivec/..) */
200   const char * pffft_simd_arch();
201 
202 
203   /* following functions are identical to the pffftd_ functions */
204 
205   /* simple helper to get minimum possible fft size */
206   int pffft_min_fft_size(pffft_transform_t transform);
207 
208   /* simple helper to determine next power of 2
209      - without inexact/rounding floating point operations
210   */
211   int pffft_next_power_of_two(int N);
212 
213   /* simple helper to determine if power of 2 - returns bool */
214   int pffft_is_power_of_two(int N);
215 
216   /* simple helper to determine size N is valid
217      - factorizable to pffft_min_fft_size() with factors 2, 3, 5
218      returns bool
219   */
220   int pffft_is_valid_size(int N, pffft_transform_t cplx);
221 
222   /* determine nearest valid transform size  (by brute-force testing)
223      - factorizable to pffft_min_fft_size() with factors 2, 3, 5.
224      higher: bool-flag to find nearest higher value; else lower.
225   */
226   int pffft_nearest_transform_size(int N, pffft_transform_t cplx, int higher);
227 
228   /*
229     the float buffers must have the correct alignment (16-byte boundary
230     on intel and powerpc). This function may be used to obtain such
231     correctly aligned buffers.
232   */
233   void *pffft_aligned_malloc(size_t nb_bytes);
234   void pffft_aligned_free(void *);
235 
236 #ifdef __cplusplus
237 }
238 #endif
239 
240 #endif /* PFFFT_H */
241 
242