1 /* Copyright (c) 2013 Julien Pommier ( pommier@modartt.com ) 2 3 Based on original fortran 77 code from FFTPACKv4 from NETLIB, 4 authored by Dr Paul Swarztrauber of NCAR, in 1985. 5 6 As confirmed by the NCAR fftpack software curators, the following 7 FFTPACKv5 license applies to FFTPACKv4 sources. My changes are 8 released under the same terms. 9 10 FFTPACK license: 11 12 http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html 13 14 Copyright (c) 2004 the University Corporation for Atmospheric 15 Research ("UCAR"). All rights reserved. Developed by NCAR's 16 Computational and Information Systems Laboratory, UCAR, 17 www.cisl.ucar.edu. 18 19 Redistribution and use of the Software in source and binary forms, 20 with or without modification, is permitted provided that the 21 following conditions are met: 22 23 - Neither the names of NCAR's Computational and Information Systems 24 Laboratory, the University Corporation for Atmospheric Research, 25 nor the names of its sponsors or contributors may be used to 26 endorse or promote products derived from this Software without 27 specific prior written permission. 28 29 - Redistributions of source code must retain the above copyright 30 notices, this list of conditions, and the disclaimer below. 31 32 - Redistributions in binary form must reproduce the above copyright 33 notice, this list of conditions, and the disclaimer below in the 34 documentation and/or other materials provided with the 35 distribution. 36 37 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 38 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 39 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 40 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 41 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 42 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 43 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 44 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 45 SOFTWARE. 46 */ 47 48 /* 49 PFFFT : a Pretty Fast FFT. 50 51 This is basically an adaptation of the single precision fftpack 52 (v4) as found on netlib taking advantage of SIMD instruction found 53 on cpus such as intel x86 (SSE1), powerpc (Altivec), and arm (NEON). 54 55 For architectures where no SIMD instruction is available, the code 56 falls back to a scalar version. 57 58 Restrictions: 59 60 - 1D transforms only, with 32-bit single precision. 61 62 - supports only transforms for inputs of length N of the form 63 N=(2^a)*(3^b)*(5^c), a >= 5, b >=0, c >= 0 (32, 48, 64, 96, 128, 64 144, 160, etc are all acceptable lengths). Performance is best for 65 128<=N<=8192. 66 67 - all (float*) pointers in the functions below are expected to 68 have an "simd-compatible" alignment, that is 16 bytes on x86 and 69 powerpc CPUs. 70 71 You can allocate such buffers with the functions 72 pffft_aligned_malloc / pffft_aligned_free (or with stuff like 73 posix_memalign..) 74 75 */ 76 77 #ifndef PFFFT_H 78 #define PFFFT_H 79 80 #include <stddef.h> /* for size_t */ 81 82 #ifdef __cplusplus 83 extern "C" { 84 #endif 85 86 /* opaque struct holding internal stuff (precomputed twiddle factors) 87 this struct can be shared by many threads as it contains only 88 read-only data. 89 */ 90 typedef struct PFFFT_Setup PFFFT_Setup; 91 92 #ifndef PFFFT_COMMON_ENUMS 93 #define PFFFT_COMMON_ENUMS 94 95 /* direction of the transform */ 96 typedef enum { PFFFT_FORWARD, PFFFT_BACKWARD } pffft_direction_t; 97 98 /* type of transform */ 99 typedef enum { PFFFT_REAL, PFFFT_COMPLEX } pffft_transform_t; 100 101 #endif 102 103 /* 104 prepare for performing transforms of size N -- the returned 105 PFFFT_Setup structure is read-only so it can safely be shared by 106 multiple concurrent threads. 107 */ 108 PFFFT_Setup *pffft_new_setup(int N, pffft_transform_t transform); 109 void pffft_destroy_setup(PFFFT_Setup *); 110 /* 111 Perform a Fourier transform , The z-domain data is stored in the 112 most efficient order for transforming it back, or using it for 113 convolution. If you need to have its content sorted in the 114 "usual" way, that is as an array of interleaved complex numbers, 115 either use pffft_transform_ordered , or call pffft_zreorder after 116 the forward fft, and before the backward fft. 117 118 Transforms are not scaled: PFFFT_BACKWARD(PFFFT_FORWARD(x)) = N*x. 119 Typically you will want to scale the backward transform by 1/N. 120 121 The 'work' pointer should point to an area of N (2*N for complex 122 fft) floats, properly aligned. If 'work' is NULL, then stack will 123 be used instead (this is probably the best strategy for small 124 FFTs, say for N < 16384). Threads usually have a small stack, that 125 there's no sufficient amount of memory, usually leading to a crash! 126 Use the heap with pffft_aligned_malloc() in this case. 127 128 For a real forward transform (PFFFT_REAL | PFFFT_FORWARD) with real 129 input with input(=transformation) length N, the output array is 130 'mostly' complex: 131 index k in 1 .. N/2 -1 corresponds to frequency k * Samplerate / N 132 index k == 0 is a special case: 133 the real() part contains the result for the DC frequency 0, 134 the imag() part contains the result for the Nyquist frequency Samplerate/2 135 both 0-frequency and half frequency components, which are real, 136 are assembled in the first entry as F(0)+i*F(N/2). 137 With the output size N/2 complex values (=N real/imag values), it is 138 obvious, that the result for negative frequencies are not output, 139 cause of symmetry. 140 141 input and output may alias. 142 */ 143 void pffft_transform(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); 144 145 /* 146 Similar to pffft_transform, but makes sure that the output is 147 ordered as expected (interleaved complex numbers). This is 148 similar to calling pffft_transform and then pffft_zreorder. 149 150 input and output may alias. 151 */ 152 void pffft_transform_ordered(PFFFT_Setup *setup, const float *input, float *output, float *work, pffft_direction_t direction); 153 154 /* 155 call pffft_zreorder(.., PFFFT_FORWARD) after pffft_transform(..., 156 PFFFT_FORWARD) if you want to have the frequency components in 157 the correct "canonical" order, as interleaved complex numbers. 158 159 (for real transforms, both 0-frequency and half frequency 160 components, which are real, are assembled in the first entry as 161 F(0)+i*F(n/2+1). Note that the original fftpack did place 162 F(n/2+1) at the end of the arrays). 163 164 input and output should not alias. 165 */ 166 void pffft_zreorder(PFFFT_Setup *setup, const float *input, float *output, pffft_direction_t direction); 167 168 /* 169 Perform a multiplication of the frequency components of dft_a and 170 dft_b and accumulate them into dft_ab. The arrays should have 171 been obtained with pffft_transform(.., PFFFT_FORWARD) and should 172 *not* have been reordered with pffft_zreorder (otherwise just 173 perform the operation yourself as the dft coefs are stored as 174 interleaved complex numbers). 175 176 the operation performed is: dft_ab += (dft_a * fdt_b)*scaling 177 178 The dft_a, dft_b and dft_ab pointers may alias. 179 */ 180 void pffft_zconvolve_accumulate(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); 181 182 /* 183 Perform a multiplication of the frequency components of dft_a and 184 dft_b and put result in dft_ab. The arrays should have 185 been obtained with pffft_transform(.., PFFFT_FORWARD) and should 186 *not* have been reordered with pffft_zreorder (otherwise just 187 perform the operation yourself as the dft coefs are stored as 188 interleaved complex numbers). 189 190 the operation performed is: dft_ab = (dft_a * fdt_b)*scaling 191 192 The dft_a, dft_b and dft_ab pointers may alias. 193 */ 194 void pffft_zconvolve_no_accu(PFFFT_Setup *setup, const float *dft_a, const float *dft_b, float *dft_ab, float scaling); 195 196 /* return 4 or 1 wether support SSE/NEON/Altivec instructions was enabled when building pffft.c */ 197 int pffft_simd_size(); 198 199 /* return string identifier of used architecture (SSE/NEON/Altivec/..) */ 200 const char * pffft_simd_arch(); 201 202 203 /* following functions are identical to the pffftd_ functions */ 204 205 /* simple helper to get minimum possible fft size */ 206 int pffft_min_fft_size(pffft_transform_t transform); 207 208 /* simple helper to determine next power of 2 209 - without inexact/rounding floating point operations 210 */ 211 int pffft_next_power_of_two(int N); 212 213 /* simple helper to determine if power of 2 - returns bool */ 214 int pffft_is_power_of_two(int N); 215 216 /* simple helper to determine size N is valid 217 - factorizable to pffft_min_fft_size() with factors 2, 3, 5 218 returns bool 219 */ 220 int pffft_is_valid_size(int N, pffft_transform_t cplx); 221 222 /* determine nearest valid transform size (by brute-force testing) 223 - factorizable to pffft_min_fft_size() with factors 2, 3, 5. 224 higher: bool-flag to find nearest higher value; else lower. 225 */ 226 int pffft_nearest_transform_size(int N, pffft_transform_t cplx, int higher); 227 228 /* 229 the float buffers must have the correct alignment (16-byte boundary 230 on intel and powerpc). This function may be used to obtain such 231 correctly aligned buffers. 232 */ 233 void *pffft_aligned_malloc(size_t nb_bytes); 234 void pffft_aligned_free(void *); 235 236 #ifdef __cplusplus 237 } 238 #endif 239 240 #endif /* PFFFT_H */ 241 242