• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
3  *  This file is part of KISS FFT - https://github.com/mborgerding/kissfft
4  *
5  *  SPDX-License-Identifier: BSD-3-Clause
6  *  See COPYING file for more information.
7  */
8 
9 
10 #include "_kiss_fft_guts_s32.h"
11 /* The guts header contains all the multiplication and addition macros that are defined for
12  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
13  */
14 
15 static void
kf_bfly2(kiss_fft_s32_cpx * Fout,const size_t fstride,const kiss_fft_s32_cfg st,int m)16 kf_bfly2 (kiss_fft_s32_cpx * Fout,
17     const size_t fstride, const kiss_fft_s32_cfg st, int m)
18 {
19   kiss_fft_s32_cpx *Fout2;
20   kiss_fft_s32_cpx *tw1 = st->twiddles;
21   kiss_fft_s32_cpx t;
22   Fout2 = Fout + m;
23   do {
24     C_FIXDIV (*Fout, 2);
25     C_FIXDIV (*Fout2, 2);
26 
27     C_MUL (t, *Fout2, *tw1);
28     tw1 += fstride;
29     C_SUB (*Fout2, *Fout, t);
30     C_ADDTO (*Fout, t);
31     ++Fout2;
32     ++Fout;
33   } while (--m);
34 }
35 
36 static void
kf_bfly4(kiss_fft_s32_cpx * Fout,const size_t fstride,const kiss_fft_s32_cfg st,const size_t m)37 kf_bfly4 (kiss_fft_s32_cpx * Fout,
38     const size_t fstride, const kiss_fft_s32_cfg st, const size_t m)
39 {
40   kiss_fft_s32_cpx *tw1, *tw2, *tw3;
41   kiss_fft_s32_cpx scratch[6];
42   size_t k = m;
43   const size_t m2 = 2 * m;
44   const size_t m3 = 3 * m;
45 
46 
47   tw3 = tw2 = tw1 = st->twiddles;
48 
49   do {
50     C_FIXDIV (*Fout, 4);
51     C_FIXDIV (Fout[m], 4);
52     C_FIXDIV (Fout[m2], 4);
53     C_FIXDIV (Fout[m3], 4);
54 
55     C_MUL (scratch[0], Fout[m], *tw1);
56     C_MUL (scratch[1], Fout[m2], *tw2);
57     C_MUL (scratch[2], Fout[m3], *tw3);
58 
59     C_SUB (scratch[5], *Fout, scratch[1]);
60     C_ADDTO (*Fout, scratch[1]);
61     C_ADD (scratch[3], scratch[0], scratch[2]);
62     C_SUB (scratch[4], scratch[0], scratch[2]);
63     C_SUB (Fout[m2], *Fout, scratch[3]);
64     tw1 += fstride;
65     tw2 += fstride * 2;
66     tw3 += fstride * 3;
67     C_ADDTO (*Fout, scratch[3]);
68 
69     if (st->inverse) {
70       Fout[m].r = scratch[5].r - scratch[4].i;
71       Fout[m].i = scratch[5].i + scratch[4].r;
72       Fout[m3].r = scratch[5].r + scratch[4].i;
73       Fout[m3].i = scratch[5].i - scratch[4].r;
74     } else {
75       Fout[m].r = scratch[5].r + scratch[4].i;
76       Fout[m].i = scratch[5].i - scratch[4].r;
77       Fout[m3].r = scratch[5].r - scratch[4].i;
78       Fout[m3].i = scratch[5].i + scratch[4].r;
79     }
80     ++Fout;
81   } while (--k);
82 }
83 
84 static void
kf_bfly3(kiss_fft_s32_cpx * Fout,const size_t fstride,const kiss_fft_s32_cfg st,size_t m)85 kf_bfly3 (kiss_fft_s32_cpx * Fout,
86     const size_t fstride, const kiss_fft_s32_cfg st, size_t m)
87 {
88   size_t k = m;
89   const size_t m2 = 2 * m;
90   kiss_fft_s32_cpx *tw1, *tw2;
91   kiss_fft_s32_cpx scratch[5];
92   kiss_fft_s32_cpx epi3;
93   epi3 = st->twiddles[fstride * m];
94 
95   tw1 = tw2 = st->twiddles;
96 
97   do {
98     C_FIXDIV (*Fout, 3);
99     C_FIXDIV (Fout[m], 3);
100     C_FIXDIV (Fout[m2], 3);
101 
102     C_MUL (scratch[1], Fout[m], *tw1);
103     C_MUL (scratch[2], Fout[m2], *tw2);
104 
105     C_ADD (scratch[3], scratch[1], scratch[2]);
106     C_SUB (scratch[0], scratch[1], scratch[2]);
107     tw1 += fstride;
108     tw2 += fstride * 2;
109 
110     Fout[m].r = Fout->r - HALF_OF (scratch[3].r);
111     Fout[m].i = Fout->i - HALF_OF (scratch[3].i);
112 
113     C_MULBYSCALAR (scratch[0], epi3.i);
114 
115     C_ADDTO (*Fout, scratch[3]);
116 
117     Fout[m2].r = Fout[m].r + scratch[0].i;
118     Fout[m2].i = Fout[m].i - scratch[0].r;
119 
120     Fout[m].r -= scratch[0].i;
121     Fout[m].i += scratch[0].r;
122 
123     ++Fout;
124   } while (--k);
125 }
126 
127 static void
kf_bfly5(kiss_fft_s32_cpx * Fout,const size_t fstride,const kiss_fft_s32_cfg st,int m)128 kf_bfly5 (kiss_fft_s32_cpx * Fout,
129     const size_t fstride, const kiss_fft_s32_cfg st, int m)
130 {
131   kiss_fft_s32_cpx *Fout0, *Fout1, *Fout2, *Fout3, *Fout4;
132   int u;
133   kiss_fft_s32_cpx scratch[13];
134   kiss_fft_s32_cpx *twiddles = st->twiddles;
135   kiss_fft_s32_cpx *tw;
136   kiss_fft_s32_cpx ya, yb;
137   ya = twiddles[fstride * m];
138   yb = twiddles[fstride * 2 * m];
139 
140   Fout0 = Fout;
141   Fout1 = Fout0 + m;
142   Fout2 = Fout0 + 2 * m;
143   Fout3 = Fout0 + 3 * m;
144   Fout4 = Fout0 + 4 * m;
145 
146   tw = st->twiddles;
147   for (u = 0; u < m; ++u) {
148     C_FIXDIV (*Fout0, 5);
149     C_FIXDIV (*Fout1, 5);
150     C_FIXDIV (*Fout2, 5);
151     C_FIXDIV (*Fout3, 5);
152     C_FIXDIV (*Fout4, 5);
153     scratch[0] = *Fout0;
154 
155     C_MUL (scratch[1], *Fout1, tw[u * fstride]);
156     C_MUL (scratch[2], *Fout2, tw[2 * u * fstride]);
157     C_MUL (scratch[3], *Fout3, tw[3 * u * fstride]);
158     C_MUL (scratch[4], *Fout4, tw[4 * u * fstride]);
159 
160     C_ADD (scratch[7], scratch[1], scratch[4]);
161     C_SUB (scratch[10], scratch[1], scratch[4]);
162     C_ADD (scratch[8], scratch[2], scratch[3]);
163     C_SUB (scratch[9], scratch[2], scratch[3]);
164 
165     Fout0->r += scratch[7].r + scratch[8].r;
166     Fout0->i += scratch[7].i + scratch[8].i;
167 
168     scratch[5].r =
169         scratch[0].r + S_MUL (scratch[7].r, ya.r) + S_MUL (scratch[8].r, yb.r);
170     scratch[5].i =
171         scratch[0].i + S_MUL (scratch[7].i, ya.r) + S_MUL (scratch[8].i, yb.r);
172 
173     scratch[6].r = S_MUL (scratch[10].i, ya.i) + S_MUL (scratch[9].i, yb.i);
174     scratch[6].i = -S_MUL (scratch[10].r, ya.i) - S_MUL (scratch[9].r, yb.i);
175 
176     C_SUB (*Fout1, scratch[5], scratch[6]);
177     C_ADD (*Fout4, scratch[5], scratch[6]);
178 
179     scratch[11].r =
180         scratch[0].r + S_MUL (scratch[7].r, yb.r) + S_MUL (scratch[8].r, ya.r);
181     scratch[11].i =
182         scratch[0].i + S_MUL (scratch[7].i, yb.r) + S_MUL (scratch[8].i, ya.r);
183     scratch[12].r = -S_MUL (scratch[10].i, yb.i) + S_MUL (scratch[9].i, ya.i);
184     scratch[12].i = S_MUL (scratch[10].r, yb.i) - S_MUL (scratch[9].r, ya.i);
185 
186     C_ADD (*Fout2, scratch[11], scratch[12]);
187     C_SUB (*Fout3, scratch[11], scratch[12]);
188 
189     ++Fout0;
190     ++Fout1;
191     ++Fout2;
192     ++Fout3;
193     ++Fout4;
194   }
195 }
196 
197 /* perform the butterfly for one stage of a mixed radix FFT */
198 static void
kf_bfly_generic(kiss_fft_s32_cpx * Fout,const size_t fstride,const kiss_fft_s32_cfg st,int m,int p)199 kf_bfly_generic (kiss_fft_s32_cpx * Fout,
200     const size_t fstride, const kiss_fft_s32_cfg st, int m, int p)
201 {
202   int u, k, q1, q;
203   kiss_fft_s32_cpx *twiddles = st->twiddles;
204   kiss_fft_s32_cpx t;
205   int Norig = st->nfft;
206 
207   kiss_fft_s32_cpx *scratch =
208       (kiss_fft_s32_cpx *) KISS_FFT_S32_TMP_ALLOC (sizeof (kiss_fft_s32_cpx) *
209       p);
210 
211   for (u = 0; u < m; ++u) {
212     k = u;
213     for (q1 = 0; q1 < p; ++q1) {
214       scratch[q1] = Fout[k];
215       C_FIXDIV (scratch[q1], p);
216       k += m;
217     }
218 
219     k = u;
220     for (q1 = 0; q1 < p; ++q1) {
221       int twidx = 0;
222       Fout[k] = scratch[0];
223       for (q = 1; q < p; ++q) {
224         twidx += fstride * k;
225         if (twidx >= Norig)
226           twidx -= Norig;
227         C_MUL (t, scratch[q], twiddles[twidx]);
228         C_ADDTO (Fout[k], t);
229       }
230       k += m;
231     }
232   }
233   KISS_FFT_S32_TMP_FREE (scratch);
234 }
235 
236 static void
kf_work(kiss_fft_s32_cpx * Fout,const kiss_fft_s32_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_s32_cfg st)237 kf_work (kiss_fft_s32_cpx * Fout,
238     const kiss_fft_s32_cpx * f,
239     const size_t fstride, int in_stride, int *factors,
240     const kiss_fft_s32_cfg st)
241 {
242   kiss_fft_s32_cpx *Fout_beg = Fout;
243   const int p = *factors++;     /* the radix  */
244   const int m = *factors++;     /* stage's fft length/p */
245   const kiss_fft_s32_cpx *Fout_end = Fout + p * m;
246 
247 #ifdef _OPENMP
248   // use openmp extensions at the
249   // top-level (not recursive)
250   if (fstride == 1 && p <= 5 && m != 1) {
251     int k;
252 
253     // execute the p different work units in different threads
254 #       pragma omp parallel for
255     for (k = 0; k < p; ++k)
256       kf_work (Fout + k * m, f + fstride * in_stride * k, fstride * p,
257           in_stride, factors, st);
258     // all threads have joined by this point
259 
260     switch (p) {
261       case 2:
262         kf_bfly2 (Fout, fstride, st, m);
263         break;
264       case 3:
265         kf_bfly3 (Fout, fstride, st, m);
266         break;
267       case 4:
268         kf_bfly4 (Fout, fstride, st, m);
269         break;
270       case 5:
271         kf_bfly5 (Fout, fstride, st, m);
272         break;
273       default:
274         kf_bfly_generic (Fout, fstride, st, m, p);
275         break;
276     }
277     return;
278   }
279 #endif
280 
281   if (m == 1) {
282     do {
283       *Fout = *f;
284       f += fstride * in_stride;
285     } while (++Fout != Fout_end);
286   } else {
287     do {
288       // recursive call:
289       // DFT of size m*p performed by doing
290       // p instances of smaller DFTs of size m,
291       // each one takes a decimated version of the input
292       kf_work (Fout, f, fstride * p, in_stride, factors, st);
293       f += fstride * in_stride;
294     } while ((Fout += m) != Fout_end);
295   }
296 
297   Fout = Fout_beg;
298 
299   // recombine the p smaller DFTs
300   switch (p) {
301     case 2:
302       kf_bfly2 (Fout, fstride, st, m);
303       break;
304     case 3:
305       kf_bfly3 (Fout, fstride, st, m);
306       break;
307     case 4:
308       kf_bfly4 (Fout, fstride, st, m);
309       break;
310     case 5:
311       kf_bfly5 (Fout, fstride, st, m);
312       break;
313     default:
314       kf_bfly_generic (Fout, fstride, st, m, p);
315       break;
316   }
317 }
318 
319 /*  facbuf is populated by p1,m1,p2,m2, ...
320     where
321     p[i] * m[i] = m[i-1]
322     m0 = n                  */
323 static void
kf_factor(int n,int * facbuf)324 kf_factor (int n, int *facbuf)
325 {
326   int p = 4;
327   double floor_sqrt;
328   floor_sqrt = floor (sqrt ((double) n));
329 
330   /*factor out powers of 4, powers of 2, then any remaining primes */
331   do {
332     while (n % p) {
333       switch (p) {
334         case 4:
335           p = 2;
336           break;
337         case 2:
338           p = 3;
339           break;
340         default:
341           p += 2;
342           break;
343       }
344       if (p > floor_sqrt)
345         p = n;                  /* no more factors, skip to end */
346     }
347     n /= p;
348     *facbuf++ = p;
349     *facbuf++ = n;
350   } while (n > 1);
351 }
352 
353 /*
354  *
355  * User-callable function to allocate all necessary storage space for the fft.
356  *
357  * The return value is a contiguous block of memory, allocated with malloc.  As such,
358  * It can be freed with free(), rather than a kiss_fft_s32-specific function.
359  * */
360 kiss_fft_s32_cfg
kiss_fft_s32_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)361 kiss_fft_s32_alloc (int nfft, int inverse_fft, void *mem, size_t * lenmem)
362 {
363   kiss_fft_s32_cfg st = NULL;
364   size_t memneeded = sizeof (struct kiss_fft_s32_state)
365       + sizeof (kiss_fft_s32_cpx) * (nfft - 1); /* twiddle factors */
366 
367   if (lenmem == NULL) {
368     st = (kiss_fft_s32_cfg) KISS_FFT_S32_MALLOC (memneeded);
369   } else {
370     if (mem != NULL && *lenmem >= memneeded)
371       st = (kiss_fft_s32_cfg) mem;
372     *lenmem = memneeded;
373   }
374   if (st) {
375     int i;
376     st->nfft = nfft;
377     st->inverse = inverse_fft;
378 
379     for (i = 0; i < nfft; ++i) {
380       const double pi =
381           3.141592653589793238462643383279502884197169399375105820974944;
382       double phase = -2 * pi * i / nfft;
383       if (st->inverse)
384         phase *= -1;
385       kf_cexp (st->twiddles + i, phase);
386     }
387 
388     kf_factor (nfft, st->factors);
389   }
390   return st;
391 }
392 
393 
394 void
kiss_fft_s32_stride(kiss_fft_s32_cfg st,const kiss_fft_s32_cpx * fin,kiss_fft_s32_cpx * fout,int in_stride)395 kiss_fft_s32_stride (kiss_fft_s32_cfg st, const kiss_fft_s32_cpx * fin,
396     kiss_fft_s32_cpx * fout, int in_stride)
397 {
398   if (fin == fout) {
399     //NOTE: this is not really an in-place FFT algorithm.
400     //It just performs an out-of-place FFT into a temp buffer
401     kiss_fft_s32_cpx *tmpbuf =
402         (kiss_fft_s32_cpx *) KISS_FFT_S32_TMP_ALLOC (sizeof (kiss_fft_s32_cpx) *
403         st->nfft);
404     kf_work (tmpbuf, fin, 1, in_stride, st->factors, st);
405     memcpy (fout, tmpbuf, sizeof (kiss_fft_s32_cpx) * st->nfft);
406     KISS_FFT_S32_TMP_FREE (tmpbuf);
407   } else {
408     kf_work (fout, fin, 1, in_stride, st->factors, st);
409   }
410 }
411 
412 void
kiss_fft_s32(kiss_fft_s32_cfg cfg,const kiss_fft_s32_cpx * fin,kiss_fft_s32_cpx * fout)413 kiss_fft_s32 (kiss_fft_s32_cfg cfg, const kiss_fft_s32_cpx * fin,
414     kiss_fft_s32_cpx * fout)
415 {
416   kiss_fft_s32_stride (cfg, fin, fout, 1);
417 }
418 
419 
420 void
kiss_fft_s32_cleanup(void)421 kiss_fft_s32_cleanup (void)
422 {
423   // nothing needed any more
424 }
425 
426 int
kiss_fft_s32_next_fast_size(int n)427 kiss_fft_s32_next_fast_size (int n)
428 {
429   while (1) {
430     int m = n;
431     while ((m % 2) == 0)
432       m /= 2;
433     while ((m % 3) == 0)
434       m /= 3;
435     while ((m % 5) == 0)
436       m /= 5;
437     if (m <= 1)
438       break;                    /* n is completely factorable by twos, threes, and fives */
439     n++;
440   }
441   return n;
442 }
443