1 /*
2 Copyright (c) 2003-2010, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15
16 #include "_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */
20
21 // CHRE modifications begin
22 #if defined(__clang__)
23 #pragma clang diagnostic push
24 #pragma clang diagnostic ignored "-Wcast-align"
25 #pragma clang diagnostic ignored "-Wbad-function-cast"
26 #endif
27 // CHRE modifications end
28
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)29 static void kf_bfly2(
30 kiss_fft_cpx * Fout,
31 const size_t fstride,
32 const kiss_fft_cfg st,
33 int m
34 )
35 {
36 kiss_fft_cpx * Fout2;
37 kiss_fft_cpx * tw1 = st->twiddles;
38 kiss_fft_cpx t;
39 Fout2 = Fout + m;
40 do{
41 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
42
43 C_MUL (t, *Fout2 , *tw1);
44 tw1 += fstride;
45 C_SUB( *Fout2 , *Fout , t );
46 C_ADDTO( *Fout , t );
47 ++Fout2;
48 ++Fout;
49 }while (--m);
50 }
51
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)52 static void kf_bfly4(
53 kiss_fft_cpx * Fout,
54 const size_t fstride,
55 const kiss_fft_cfg st,
56 const size_t m
57 )
58 {
59 kiss_fft_cpx *tw1,*tw2,*tw3;
60 kiss_fft_cpx scratch[6];
61 size_t k=m;
62 const size_t m2=2*m;
63 const size_t m3=3*m;
64
65
66 tw3 = tw2 = tw1 = st->twiddles;
67
68 do {
69 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
70
71 C_MUL(scratch[0],Fout[m] , *tw1 );
72 C_MUL(scratch[1],Fout[m2] , *tw2 );
73 C_MUL(scratch[2],Fout[m3] , *tw3 );
74
75 C_SUB( scratch[5] , *Fout, scratch[1] );
76 C_ADDTO(*Fout, scratch[1]);
77 C_ADD( scratch[3] , scratch[0] , scratch[2] );
78 C_SUB( scratch[4] , scratch[0] , scratch[2] );
79 C_SUB( Fout[m2], *Fout, scratch[3] );
80 tw1 += fstride;
81 tw2 += fstride*2;
82 tw3 += fstride*3;
83 C_ADDTO( *Fout , scratch[3] );
84
85 if(st->inverse) {
86 Fout[m].r = scratch[5].r - scratch[4].i;
87 Fout[m].i = scratch[5].i + scratch[4].r;
88 Fout[m3].r = scratch[5].r + scratch[4].i;
89 Fout[m3].i = scratch[5].i - scratch[4].r;
90 }else{
91 Fout[m].r = scratch[5].r + scratch[4].i;
92 Fout[m].i = scratch[5].i - scratch[4].r;
93 Fout[m3].r = scratch[5].r - scratch[4].i;
94 Fout[m3].i = scratch[5].i + scratch[4].r;
95 }
96 ++Fout;
97 }while(--k);
98 }
99
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)100 static void kf_bfly3(
101 kiss_fft_cpx * Fout,
102 const size_t fstride,
103 const kiss_fft_cfg st,
104 size_t m
105 )
106 {
107 size_t k=m;
108 const size_t m2 = 2*m;
109 kiss_fft_cpx *tw1,*tw2;
110 kiss_fft_cpx scratch[5];
111 kiss_fft_cpx epi3;
112 epi3 = st->twiddles[fstride*m];
113
114 tw1=tw2=st->twiddles;
115
116 do{
117 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
118
119 C_MUL(scratch[1],Fout[m] , *tw1);
120 C_MUL(scratch[2],Fout[m2] , *tw2);
121
122 C_ADD(scratch[3],scratch[1],scratch[2]);
123 C_SUB(scratch[0],scratch[1],scratch[2]);
124 tw1 += fstride;
125 tw2 += fstride*2;
126
127 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
128 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
129
130 C_MULBYSCALAR( scratch[0] , epi3.i );
131
132 C_ADDTO(*Fout,scratch[3]);
133
134 Fout[m2].r = Fout[m].r + scratch[0].i;
135 Fout[m2].i = Fout[m].i - scratch[0].r;
136
137 Fout[m].r -= scratch[0].i;
138 Fout[m].i += scratch[0].r;
139
140 ++Fout;
141 }while(--k);
142 }
143
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)144 static void kf_bfly5(
145 kiss_fft_cpx * Fout,
146 const size_t fstride,
147 const kiss_fft_cfg st,
148 int m
149 )
150 {
151 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
152 size_t u;
153 kiss_fft_cpx scratch[13];
154 kiss_fft_cpx * twiddles = st->twiddles;
155 kiss_fft_cpx *tw;
156 kiss_fft_cpx ya,yb;
157 ya = twiddles[fstride*(size_t)m];
158 yb = twiddles[fstride*2*(size_t)m];
159
160 Fout0=Fout;
161 Fout1=Fout0+m;
162 Fout2=Fout0+2*m;
163 Fout3=Fout0+3*m;
164 Fout4=Fout0+4*m;
165
166 tw=st->twiddles;
167 for ( u=0; u<(size_t)m; ++u ) {
168 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
169 scratch[0] = *Fout0;
170
171 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
172 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
173 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
174 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
175
176 C_ADD( scratch[7],scratch[1],scratch[4]);
177 C_SUB( scratch[10],scratch[1],scratch[4]);
178 C_ADD( scratch[8],scratch[2],scratch[3]);
179 C_SUB( scratch[9],scratch[2],scratch[3]);
180
181 Fout0->r += scratch[7].r + scratch[8].r;
182 Fout0->i += scratch[7].i + scratch[8].i;
183
184 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
185 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
186
187 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
188 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
189
190 C_SUB(*Fout1,scratch[5],scratch[6]);
191 C_ADD(*Fout4,scratch[5],scratch[6]);
192
193 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
194 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
195 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
196 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
197
198 C_ADD(*Fout2,scratch[11],scratch[12]);
199 C_SUB(*Fout3,scratch[11],scratch[12]);
200
201 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
202 }
203 }
204
205 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)206 static void kf_bfly_generic(
207 kiss_fft_cpx * Fout,
208 const size_t fstride,
209 const kiss_fft_cfg st,
210 int m,
211 int p
212 )
213 {
214 int u,k,q1,q;
215 kiss_fft_cpx * twiddles = st->twiddles;
216 kiss_fft_cpx t;
217 int Norig = st->nfft;
218
219 kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*(size_t)p);
220
221 for ( u=0; u<m; ++u ) {
222 k=u;
223 for ( q1=0 ; q1<p ; ++q1 ) {
224 scratch[q1] = Fout[ k ];
225 C_FIXDIV(scratch[q1],p);
226 k += m;
227 }
228
229 k=u;
230 for ( q1=0 ; q1<p ; ++q1 ) {
231 int twidx=0;
232 Fout[ k ] = scratch[0];
233 for (q=1;q<p;++q ) {
234 twidx += fstride * (size_t)k;
235 if (twidx>=Norig) twidx-=Norig;
236 C_MUL(t,scratch[q] , twiddles[twidx] );
237 C_ADDTO( Fout[ k ] ,t);
238 }
239 k += m;
240 }
241 }
242 KISS_FFT_TMP_FREE(scratch);
243 }
244
245 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)246 void kf_work(
247 kiss_fft_cpx * Fout,
248 const kiss_fft_cpx * f,
249 const size_t fstride,
250 int in_stride,
251 int * factors,
252 const kiss_fft_cfg st
253 )
254 {
255 kiss_fft_cpx * Fout_beg=Fout;
256 const int p=*factors++; /* the radix */
257 const int m=*factors++; /* stage's fft length/p */
258 const kiss_fft_cpx * Fout_end = Fout + p*m;
259
260 #ifdef _OPENMP
261 // use openmp extensions at the
262 // top-level (not recursive)
263 if (fstride==1 && p<=5)
264 {
265 int k;
266
267 // execute the p different work units in different threads
268 # pragma omp parallel for
269 for (k=0;k<p;++k)
270 kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
271 // all threads have joined by this point
272
273 switch (p) {
274 case 2: kf_bfly2(Fout,fstride,st,m); break;
275 case 3: kf_bfly3(Fout,fstride,st,m); break;
276 case 4: kf_bfly4(Fout,fstride,st,m); break;
277 case 5: kf_bfly5(Fout,fstride,st,m); break;
278 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
279 }
280 return;
281 }
282 #endif
283
284 if (m==1) {
285 do{
286 *Fout = *f;
287 f += fstride*(size_t)in_stride;
288 }while(++Fout != Fout_end );
289 }else{
290 do{
291 // recursive call:
292 // DFT of size m*p performed by doing
293 // p instances of smaller DFTs of size m,
294 // each one takes a decimated version of the input
295 kf_work( Fout , f, fstride*(size_t)p, in_stride, factors,st);
296 f += fstride*(size_t)in_stride;
297 }while( (Fout += m) != Fout_end );
298 }
299
300 Fout=Fout_beg;
301
302 // recombine the p smaller DFTs
303 switch (p) {
304 case 2: kf_bfly2(Fout,fstride,st,m); break;
305 case 3: kf_bfly3(Fout,fstride,st,(size_t)m); break;
306 case 4: kf_bfly4(Fout,fstride,st,(size_t)m); break;
307 case 5: kf_bfly5(Fout,fstride,st,m); break;
308 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
309 }
310 }
311
312 /* facbuf is populated by p1,m1,p2,m2, ...
313 where
314 p[i] * m[i] = m[i-1]
315 m0 = n */
316 static
kf_factor(int n,int * facbuf)317 void kf_factor(int n,int * facbuf)
318 {
319 int p=4;
320 double floor_sqrt;
321 floor_sqrt = floor( sqrt((double)n) );
322
323 /*factor out powers of 4, powers of 2, then any remaining primes */
324 do {
325 while (n % p) {
326 switch (p) {
327 case 4: p = 2; break;
328 case 2: p = 3; break;
329 default: p += 2; break;
330 }
331 if (p > floor_sqrt)
332 p = n; /* no more factors, skip to end */
333 }
334 n /= p;
335 *facbuf++ = p;
336 *facbuf++ = n;
337 } while (n > 1);
338 }
339
340 /*
341 *
342 * User-callable function to allocate all necessary storage space for the fft.
343 *
344 * The return value is a contiguous block of memory, allocated with malloc. As such,
345 * It can be freed with free(), rather than a kiss_fft-specific function.
346 * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)347 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
348 {
349 kiss_fft_cfg st=NULL;
350 size_t memneeded = sizeof(struct kiss_fft_state)
351 + sizeof(kiss_fft_cpx)*(size_t)(nfft-1); /* twiddle factors*/
352
353 if ( lenmem==NULL ) {
354 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
355 }else{
356 if (mem != NULL && *lenmem >= memneeded)
357 st = (kiss_fft_cfg)mem;
358 *lenmem = memneeded;
359 }
360 if (st) {
361 int i;
362 st->nfft=nfft;
363 st->inverse = inverse_fft;
364
365 for (i=0;i<nfft;++i) {
366 const double pi=3.141592653589793238462643383279502884197169399375105820974944;
367 double phase = -2*pi*i / nfft;
368 if (st->inverse)
369 phase *= -1;
370 kf_cexp(st->twiddles+i, phase );
371 }
372
373 kf_factor(nfft,st->factors);
374 }
375 return st;
376 }
377
378
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)379 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
380 {
381 if (fin == fout) {
382 //NOTE: this is not really an in-place FFT algorithm.
383 //It just performs an out-of-place FFT into a temp buffer
384 kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*(size_t)st->nfft);
385 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
386 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*(size_t)(st->nfft));
387 KISS_FFT_TMP_FREE(tmpbuf);
388 }else{
389 kf_work( fout, fin, 1,in_stride, st->factors,st );
390 }
391 }
392
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)393 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
394 {
395 kiss_fft_stride(cfg,fin,fout,1);
396 }
397
398
kiss_fft_cleanup(void)399 void kiss_fft_cleanup(void)
400 {
401 // nothing needed any more
402 }
403
kiss_fft_next_fast_size(int n)404 int kiss_fft_next_fast_size(int n)
405 {
406 while(1) {
407 int m=n;
408 while ( (m%2) == 0 ) m/=2;
409 while ( (m%3) == 0 ) m/=3;
410 while ( (m%5) == 0 ) m/=5;
411 if (m<=1)
412 break; /* n is completely factorable by twos, threes, and fives */
413 n++;
414 }
415 return n;
416 }
417
418 // CHRE modifications begin
419 #if defined(__clang__)
420 #pragma clang diagnostic pop
421 #endif
422 // CHRE modifications end
423