• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  sp_fft.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 /*
21 ****************************************************************************
22 **
23 **  FILE:         sp_fft.cpp
24 **
25 **  CREATED:   11-September-99
26 **
27 **  DESCRIPTION:  Split-Radix FFT
28 **
29 **
30 **
31 **
32 **  MODIFICATIONS:
33 ** Revision history log
34 ** VSS revision history.  Do not edit by hand.
35 **
36 ** $NoKeywords: $
37 **
38 */
39 
40 #ifndef _RTT
41 #include <stdio.h>
42 #endif
43 #include <math.h>
44 #include <assert.h>
45 #include "front.h"
46 #include "portable.h"
47 
48 #include "sp_fft.h"
49 #include "himul32.h"
50 /*extern "C" asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2);*/
51 
52 /* >>>> Fixed Point, Floting Point, and Machine Specific Methods <<<<
53 
54   We will localize all fixed point, floating point, and machine specific
55   operations into the following methods so that in the main body of the code
56   we would not need to worry these issues.
57 */
58 
59 /* convert trigonomy function data to its required representation*/
60 static trigonomydata
to_trigonomydata(double a)61 to_trigonomydata(double a)
62 {
63   unsigned scale = (unsigned int)(1 << (8 * sizeof(trigonomydata) - 1));
64   return (trigonomydata)(a * scale);
65 }
66 
67 /* Do a sign-extending right shift of x by i bits, and
68  * round the result based on the leftmost bit shifted out.
69  * Must have 1 <= i < 32.
70  * Note that C doesn't define whether right shift of signed
71  * ints sign-extends or zero-fills.
72  * On platforms that do sign-extend, use the native right shift.
73  * Else use a short branch-free sequence that forces in copies
74  * of the sign bit.
75  */
rshift(asr_int32_t x,int i)76 static PINLINE asr_int32_t rshift(asr_int32_t x, int i)
77 {
78   asr_int32_t xshift = x >> i;
79   ASSERT(i >= 1 && i < 32);
80 
81 #if -1 >> 31 != -1
82   asr_int32_t signbit = (asr_int32_t)(((asr_uint32_t)x & 0x80000000U) >> i);
83   xshift |= -signbit;
84 #endif
85 
86   xshift += (x >> (i - 1)) & 1;
87   return xshift;
88 }
89 
90 
91 /*  compute (a + jb)*(c + jd) = a*c - b*d + j(ad + bc) */
complex_multiplier(trigonomydata a,trigonomydata b,fftdata c,fftdata d,fftdata * real,fftdata * imag)92 static PINLINE void complex_multiplier(trigonomydata a, trigonomydata b,
93                                        fftdata c,   fftdata d,
94                                        fftdata* real,  fftdata* imag)
95 {
96   /*
97       himul32(factor1, factor2) = floor( (factor1 * factor2) / 2**32 )
98       we need floor( (factor1 * factor2) / 2**31 )
99       retain one more bit of accuracy by left shifting first.
100   */
101   c <<= 1;
102   d <<= 1;
103   *real = himul32(a, c) - himul32(b, d);
104   *imag = himul32(a, d) + himul32(b, c);
105 }
106 
107 /* determine the maximum number of bits required to represent the data */
data_bits(const int length,fftdata data[])108 static PINLINE int data_bits(const int length, fftdata data[])
109 {
110   asr_uint32_t  bits = 0;
111   int     d    = 0;
112   int     i;
113 
114   ASSERT(sizeof(data[0]) == 4);
115 
116   for (i = 0; i < length; i++)
117   {
118     d = data[i];
119     bits |= (d > 0) ? d : -d;
120   }
121 
122   d = 0;
123   while (bits > 0)
124   {
125     bits >>= 1;
126     d++;
127   }
128 
129   return d;
130 }
131 
132 
133 /* >>>> Fixed Point, Floting Point, and Machine Independent Methods <<<< */
134 
135 /* constructor */
new_srfft(unsigned logLength)136 srfft* new_srfft(unsigned logLength)
137 {
138   srfft* pthis;
139 
140   /* cannot do smaller than 4 point FFT */
141   ASSERT(logLength >= 2);
142 
143   pthis              = (srfft*) CALLOC(1, sizeof(srfft), "srfft");
144   pthis->m_logLength = logLength;
145   pthis->m_length    = 1 << logLength;
146 
147   allocate_bitreverse_tbl(pthis);
148   allocate_butterfly_tbl(pthis);
149   allocate_trigonomy_tbl(pthis);
150 
151   return pthis;
152 }
153 
154 /* destructor */
delete_srfft(srfft * pSrfft)155 void delete_srfft(srfft* pSrfft)
156 {
157   FREE((char*)pSrfft->m_sin3Tbl);
158   FREE((char*)pSrfft->m_cos3Tbl);
159   FREE((char*)pSrfft->m_sin2Tbl);
160   FREE((char*)pSrfft->m_cos2Tbl);
161   FREE((char*)pSrfft->m_sin1Tbl);
162   FREE((char*)pSrfft->m_cos1Tbl);
163   FREE((char*)pSrfft->m_butterflyIndexTbl);
164   FREE((char*)pSrfft->m_bitreverseTbl);
165   FREE((char*)pSrfft);
166 }
167 
168 /*
169     allocate L shaped butterfly index lookup table
170 */
allocate_butterfly_tbl(srfft * pthis)171 void allocate_butterfly_tbl(srfft* pthis)
172 {
173   unsigned butterflyLength, butterflies, *butterflyIndex;
174   unsigned m, n, n2, i, j, i0, is, id, ii, ib;
175 
176 
177   /* compute total number of L shaped butterflies */
178   m = pthis->m_logLength;
179   butterflyLength = 0;
180   butterflies  = 0;
181   for (i = 0; i < m; i++)
182   {
183     butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
184     butterflyLength += butterflies;
185   }
186 
187   /*  Allocate m more spaces to store size information */
188   butterflyLength += m;
189   butterflyIndex = (unsigned*) CALLOC(butterflyLength, sizeof(unsigned), "srfft.butterflyIndex");
190 
191   /* Compute and store L shaped butterfly indexes at each stage */
192   n = pthis->m_length;
193   n2  = n << 1;
194   butterflyLength = 0;
195   ib = 0;
196   for (i = 0; i < m; i++)
197   {
198     butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
199     butterflyLength += butterflies;
200 
201     /* Store number of L butterflies at stage m-i*/
202     butterflyIndex[ib++] = butterflies;
203 
204     /* Compute Sorensen, Heideman, and Burrus indexes for L shaped butterfiles */
205     id = n2;
206     is = 0;
207     n2 = n2 >> 1;
208     while (is < n)
209     {
210       for (i0 = is; i0 < n; i0 += id)
211       {
212         butterflyIndex[ib] = i0;
213         if (i0 != 0)
214         {
215           /* sort bufferfly index in increasing order to simplify look up */
216           ii = ib;
217           while (butterflyIndex[ii] < butterflyIndex[ii-1])
218           {
219             j = butterflyIndex[ii];
220             butterflyIndex[ii] = butterflyIndex[ii-1];
221             butterflyIndex[--ii] = j;
222           }
223         }
224         ib++;
225       }
226       is = 2 * id - n2;
227       id = id << 2;
228     }
229   }
230   pthis->m_butterflyIndexTbl = butterflyIndex;
231 
232   /* move to stage 2 buffer index table */
233   for (i = 0; i < m - 2; i++)
234   {
235     butterflies = *butterflyIndex;
236     butterflyIndex += (butterflies + 1);
237   }
238 
239   /*
240       Since we want to compute four point butterflies directly,
241       when we compute two point butterflieswe at the last stage
242       we must bypass those two point butterflies that are decomposed
243       from previous stage's four point butterflies .
244   */
245   butterflies = *butterflyIndex++; /* number of four point butterflies */
246   ii = butterflies + 1;   /* index to the two point butterflies*/
247   for (i = 0; i < butterflies; i++)
248   {
249     j = butterflyIndex[i];
250 
251     /*
252         find those two point butterflies that are
253         decomposed from the four point butterflies
254     */
255     while (butterflyIndex[ii] != j) /* look up is sure so do not need worry over bound*/
256     {
257       ii++;
258     }
259     butterflyIndex[ii++] = 0;
260 
261     ASSERT(ii <= butterflyLength + m);
262   }
263 }
264 
265 
266 /*
267     Allocate trigonoy function lookup tables
268 */
allocate_trigonomy_tbl(srfft * pthis)269 void allocate_trigonomy_tbl(srfft* pthis)
270 {
271   trigonomydata *pcos1, *psin1, *pcos2, *psin2, *pcos3, *psin3;
272   unsigned  m, n, n2, n4, i, j, ii, nt;
273   double   e, radias, radias3;
274 
275   m  = pthis->m_logLength;
276   n  = pthis->m_length;
277   nt = (n >> 1) - 1;
278   pcos1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
279   psin1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
280   pcos2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
281   psin2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
282   pcos3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
283   psin3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
284 
285   ii = 0;
286   n2 = n << 1;
287   for (i = 0; i < m - 1; i++)
288   {
289     n2 = n2 >> 1;
290     n4 = n2 >> 2;
291     e = 6.283185307179586 / n2;
292 
293     for (j = 0; j < n4; j++)
294     {
295       if (j != 0) /* there is no need for radias zero trigonomy tables */
296       {
297         radias  = j * e;
298         radias3 = 3.0 * radias;
299 
300         pcos1[ii]   = to_trigonomydata(cos(radias));
301         psin1[ii]   = to_trigonomydata(sin(radias));
302         pcos3[ii]   = to_trigonomydata(cos(radias3));
303         psin3[ii]   = to_trigonomydata(sin(radias3));
304       }
305       ii++;
306     }
307   }
308 
309   for (i = 0; i < nt; i++)
310   {
311     radias = 3.141592653589793 * (i + 1) / n;
312 
313     pcos2[i]  = to_trigonomydata(cos(radias));
314     psin2[i]  = to_trigonomydata(sin(radias));
315   }
316 
317   pthis->m_cos1Tbl = pcos1;
318   pthis->m_sin1Tbl = psin1;
319   pthis->m_cos2Tbl = pcos2;
320   pthis->m_sin2Tbl = psin2;
321   pthis->m_cos3Tbl = pcos3;
322   pthis->m_sin3Tbl = psin3;
323 
324 }
325 
326 /*
327     Allocate bit reverse tables
328 */
allocate_bitreverse_tbl(srfft * pthis)329 void allocate_bitreverse_tbl(srfft* pthis)
330 {
331   unsigned forward, reverse, *tbl;
332   unsigned m, n, i, j;
333 
334   n  = pthis->m_length;
335   tbl = (unsigned*) CALLOC(n, sizeof(unsigned), "srfft.bitreverseTbl");
336   for (j = 0; j < n; j++) tbl[j] = 0;
337 
338   m  = pthis->m_logLength;
339   forward = 1;
340   reverse = n >> 1;
341   for (i = 0; i < m; i++)
342   {
343     for (j = 0; j < n; j++)
344     {
345       if (forward & j) tbl[j] |= reverse;
346     }
347     reverse >>= 1;
348     forward <<= 1;
349   }
350 
351   pthis->m_bitreverseTbl = tbl;
352 }
353 
354 
355 /*
356     Compute a four point FFT that requires no multiplications
357 */
four_point_fft1(fftdata * data)358 static PINLINE void four_point_fft1(fftdata* data)
359 {
360   fftdata r0, r1, r2;
361 
362   r0  = data[0];
363   r1  = data[4];
364   data[0] = r0 + r1;
365   data[4] = r0 - r1;
366 
367   r0  = data[2];
368   r1  = data[6];
369   data[2] = r0 + r1;
370   data[6] = r0 - r1;
371 
372   r0  = data[1];
373   r1  = data[5];
374   data[1] = r0 + r1;
375   data[5] = r0 - r1;
376 
377   r0  = data[3];
378   r1  = data[7];
379   data[3] = r0 + r1;
380   data[7] = r0 - r1;
381 
382   r0 = data[0];
383   r1 = data[2];
384   data[0] = r0 + r1;
385   data[2] = r0 - r1;
386 
387   r0 = data[1];
388   r1 = data[3];
389   data[1] = r0 + r1;
390   data[3] = r0 - r1;
391 
392   r0 = data[4];
393   r1 = data[7];
394   r2 = data[6];
395   data[4] = r0 + r1;
396   data[6] = r0 - r1;
397 
398   r0 = data[5];
399   data[5] = r0 - r2;
400   data[7] = r0 + r2;
401 }
402 
403 /*
404     Compute a two point FFT that requires no multiplications
405 */
two_point_fft1(fftdata * data)406 static PINLINE void two_point_fft1(fftdata* data)
407 {
408   fftdata r0, r1;
409 
410   r0 = data[0];
411   r1 = data[2];
412   data[0] = r0 + r1;
413   data[2] = r0 - r1;
414 
415   r0 = data[1];
416   r1 = data[3];
417   data[1] = r0 + r1;
418   data[3] = r0 - r1;
419 }
420 
421 
comp_L_butterfly1(unsigned butteflyIndex,unsigned quarterLength,trigonomydata cc1,trigonomydata ss1,trigonomydata cc3,trigonomydata ss3,fftdata * data)422 static PINLINE void comp_L_butterfly1(unsigned butteflyIndex, unsigned quarterLength,
423                                       trigonomydata  cc1,  trigonomydata ss1,
424                                       trigonomydata    cc3,  trigonomydata ss3,
425                                       fftdata* data)
426 {
427   unsigned k1, k2, k3;
428   fftdata  r0, r1, r2, r3, i0, i1, i2, i3;
429 
430   quarterLength <<= 1;
431 
432   k1 = quarterLength;
433   k2 = k1 + quarterLength;
434   k3 = k2 + quarterLength;
435 
436   r0 = data[0];
437   r1 = data[k1];
438   r2 = data[k2];
439   r3 = data[k3];
440   i0 = data[1];
441   i1 = data[k1+1];
442   i2 = data[k2+1];
443   i3 = data[k3+1];
444 
445   /* compute the radix-2 butterfly */
446   data[0]    = r0 + r2;
447   data[k1]   = r1 + r3;
448   data[1]    = i0 + i2;
449   data[k1+1] = i1 + i3;
450 
451   /* compute two radix-4 butterflies with twiddle factors */
452   r0 -= r2;
453   r1 -= r3;
454   i0 -= i2;
455   i1 -= i3;
456 
457   r2 = r0 + i1;
458   i2 = r1 - i0;
459   r3 = r0 - i1;
460   i3 = r1 + i0;
461 
462   /*
463       optimize the butterfly computation for zero's power twiddle factor
464       that does not need multimplications
465   */
466   if (butteflyIndex == 0)
467   {
468     data[k2] = r2;
469     data[k2+1] = -i2;
470     data[k3] = r3;
471     data[k3+1] = i3;
472   }
473   else
474   {
475     complex_multiplier(cc1, -ss1, r2, -i2, data + k2, data + k2 + 1);
476     complex_multiplier(cc3, -ss3, r3, i3,  data + k3, data + k3 + 1);
477   }
478 }
479 
480 /**********************************************************************/
do_fft1(srfft * pthis,unsigned length2,fftdata * data)481 void do_fft1(srfft* pthis, unsigned length2, fftdata* data)
482 {
483   unsigned  *indexTbl, indexLength;
484   trigonomydata *cos1, *sin1, *cos3, *sin3;
485   trigonomydata cc1, ss1, cc3, ss3;
486   unsigned  n, m, n4, i, j, k, ii, k0;
487   fftdata   temp;
488 
489   /* Load butterfly index table */
490   indexTbl = pthis->m_butterflyIndexTbl;
491   indexLength = 0;
492 
493   /* Load cosine and sine tables */
494   cos1 = pthis->m_cos1Tbl;
495   sin1 = pthis->m_sin1Tbl;
496   cos3 = pthis->m_cos3Tbl;
497   sin3 = pthis->m_sin3Tbl;
498 
499   /* stages of butterfly computation*/
500   n = pthis->m_length;
501   m = pthis->m_logLength;
502 
503 
504   /*
505       compute L shaped butterfies util only 4 and 2 point
506       butterfiles are left
507   */
508   n4 = n >> 1;
509   ii = 0;
510   for (i = 0; i < m - 2; i++)
511   {
512     n4 >>= 1;
513 
514     /* read the number of L shaped butterfly nodes at the stage */
515     indexLength = *indexTbl++;
516 
517     /*
518         compute one L shaped butterflies at each stage
519         j (time) and k (frequency) loops are reversed to minimize
520         trigonomy table lookups
521     */
522     for (j = 0; j < n4; j++)
523     {
524       cc1 = cos1[ii];
525       ss1 = sin1[ii];
526       cc3 = cos3[ii];
527       ss3 = sin3[ii++];
528       for (k = 0; k < indexLength; k++)
529       {
530         k0 = indexTbl[k] + j;
531         k0 <<= 1;
532         comp_L_butterfly1(j, n4, cc1, ss1, cc3, ss3, data + k0);
533       }
534     }
535 
536     /* Move to the butterfly index table of the next stage*/
537     indexTbl += indexLength;
538   }
539 
540   /* Compute 4 point butterflies at stage 2 */
541   indexLength = *indexTbl++;
542   for (k = 0; k < indexLength; k++)
543   {
544     k0 = indexTbl[k];
545     k0 <<= 1;
546     four_point_fft1(data + k0);
547   }
548   indexTbl += indexLength;
549 
550   /* Compute 2 point butterflies of the last stage */
551   indexLength = *indexTbl++;
552   for (k = 0; k < indexLength; k++)
553   {
554     k0 = indexTbl[k];
555     k0 <<= 1;
556 
557     /* k0 = 0 implies these nodes have been computed */
558     if (k0 != 0)
559     {
560       two_point_fft1(data + k0);
561     }
562   }
563 
564   /* Bit reverse the data array */
565   indexTbl = pthis->m_bitreverseTbl;
566   for (i = 0; i < n; i++)
567   {
568     ii = indexTbl[i];
569     if (i < ii)
570     {
571       j = i << 1;
572       k = ii << 1;
573       temp = data[j];
574       data[j] = data[k];
575       data[k] = temp;
576 
577       j++;
578       k++;
579       temp = data[j];
580       data[j] = data[k];
581       data[k] = temp;
582     }
583   }
584 }
585 
do_real_fft(srfft * pthis,unsigned n,fftdata * data)586 void do_real_fft(srfft* pthis, unsigned n, fftdata* data)
587 {
588   unsigned  n2;
589   unsigned  i, i1, i2, i3, i4;
590   fftdata   h1r, h1i, h2r, h2i, tr, ti;
591   trigonomydata *cos2, *sin2;
592 
593   cos2  = pthis->m_cos2Tbl;
594   sin2  = pthis->m_sin2Tbl;
595 
596   /* do a complex FFT of half size using the even indexed data
597   ** as real component and odd indexed data as imaginary data components
598   */
599 
600   do_fft1(pthis, n, data);
601 
602   /*
603   **  queeze the real valued first and last component of
604   **  the complex transform as elements data[0] and data[1]
605   */
606   tr = data[0];
607   ti = data[1];
608   data[0] = (tr + ti);
609   data[1] = (tr - ti);
610 
611   /* do the rest of elements*/
612   n2  = n >> 2;
613   for (i = 1; i < n2; i++)
614   {
615     i1 = i << 1;
616     i2 = i1 + 1;
617     i3 = n - i1;
618     i4 = i3 + 1;
619 
620     h1r = (data[i1] + data[i3]) / 2;
621     h1i = (data[i2] - data[i4]) / 2;
622     h2r = (data[i2] + data[i4]) / 2;
623     h2i = -(data[i1] - data[i3]) / 2;
624 
625     complex_multiplier(cos2[i-1], -sin2[i-1], h2r, h2i, &tr, &ti);
626 
627     data[i1] = h1r + tr;
628     data[i2] = h1i + ti;
629     data[i3] = h1r - tr;
630     data[i4] = -h1i + ti;
631   }
632   /* center one needs no multiplication, but has to reverse sign */
633   i = (n >> 1);
634   data[i+1] = -data[i+1];
635 
636 }
637 
638 /*****************************************************************************/
639 
do_real_fft_magsq(srfft * pthis,unsigned n,fftdata * data)640 int do_real_fft_magsq(srfft* pthis, unsigned n, fftdata* data)
641 {
642   fftdata tr, ti, last;
643   unsigned i, ii, n1;
644   int  scale    = 0;
645   int  s        = 0;
646   unsigned maxval   = 0;
647 
648 
649   /*
650   **   Windowed fftdata has an unknown data length - determine this using
651   **   data_bits(), a maximum of:
652   **
653   **   fixed data = windowedData * 2**HAMMING_DATA_BITS
654   **
655   **   FFT will grow data log2Length. In order to avoid data overflow,
656   **   we can scale data by a factor
657   **
658   **   scale = 8*sizeof(fftdata) - data_bits() - log2Length
659   **
660   **   In other words, we now have
661   **
662   **   fixed data = windowedData * 2**HAMMING_DATA_BITS * 2**scale
663   **
664   */
665 
666 
667   scale = 8 * sizeof(fftdata) - 2 - pthis->m_logLength;
668   scale -= data_bits(n, data);
669 
670   for (i = 0; i < n; i++)
671   {
672     if (scale < 0)
673     {
674       data[i] = rshift(data[i], -scale);
675     }
676     else
677     {
678       data[i] <<= scale;
679     }
680   }
681 
682   /* compute the real input fft,  the real valued first and last component of
683   ** the complex transform is stored as elements data[0] and data[1]
684   */
685 
686   do_real_fft(pthis, n, data);
687 
688   /*  After fft, we now have the data,
689   **
690   **  fixed data = fftdata * 2**HAMMING_DATA_BITS * 2**scale
691   **
692   **  to get fft data, we then need to reverse-shift the fixed data by the
693   **  scale constant;
694   **
695   **  However, since our purpose is to compute magnitude, we can combine
696   **  this step into the magnitude computation. Notice that
697   **
698   **  fixed data = fftdata * 2**(8*sizeof(fftdata) - DATA_BITS - log2Length)
699   **
700   **  if we use himul32 to compute the magnitude, which gives us,
701   **
702   **  fixed magnitude = fftdata magnitude * 2**(2*(32 - 16 - log2Length)) - 2**32)
703   **                  = fftdata magnitude * 2**(-2*log2Length)
704   **
705   **  to get the fixed magnitude = fftdata magnitude * 2**(-log2Length-1)
706   **                             = fftdata magnitude/FFT length
707   **  we need to upscale fftdata to cancel out the log2Lenght-1 factor in
708   **  the fixed magnitude
709   **
710   **  Notice that upshift scale log2Lenght-1 is not a constant, but a
711   **  function of FFT length.
712   **  Funthermore, even and odd log2Length-1 must be handled differently.
713   **
714   */
715 
716   /*
717   **  This bit is a lot simpler now, we just aim to get the pre-magsqu
718   **  values in a 30-bit range + sign.
719   **  This is the max val if we want r*r+i*i guarenteed < signed int64 range.
720   **  So shift the data up until it is ==30 bits (FFTDATA_SIZE-2)
721   */
722 
723   s = (FFTDATA_SIZE - 2) - data_bits(n, data);
724   /* n is twice the size, so this */
725 
726 
727   for (i = 0; i < n; i++)
728   {
729     if (s < 0)
730     {
731       data[i] = rshift(data[i], -s);
732     }
733     else
734     {
735       data[i] <<= s;
736     }
737   }
738 
739   scale += s;
740 
741   /*
742   **  OK, now we are in the 30bit range, we can do a magsq.
743   **  This magsq output must be less that 60bit plus sign.
744   */
745 
746   /*
747   **  Special at start as DC and Nyquist freqs are in data[0] and data[1]
748   **  respectively.
749   */
750 
751   tr = data[0];
752   data[0] = himul32(tr, tr);
753   maxval |= data[0];
754 
755   tr = data[1];
756   last = himul32(tr, tr);
757   maxval |= last;
758 
759   n1 = n >> 1;
760   for (i = 1; i < n1; i++)
761   {
762     ii = i << 1;
763     tr = data[ii];
764     data[i] = himul32(tr, tr);
765 
766     ti = data[++ii];
767     data[i] += himul32(ti, ti);
768 
769     maxval |= data[i];
770   }
771 
772   data[n1] = last; /* now the Nyquist freq can be put in place */
773 
774   /*
775   **  computing magnitude _squared_ means the scale is effectively
776   **  applied twice
777   */
778 
779   scale *= 2;
780 
781   /*
782   **  account for inherent scale of fft - we have do to this here as each
783   **  stage scales by sqrt(2), and we couldn't add this to scale until
784   **  after it had been multiplied by two (??)
785   **  TODO: The truth is we got here by trial and error
786   **   This should be checked.
787   */
788 
789   scale += pthis->m_logLength + 1;
790 
791   /*
792   **  doing the himul32() shift results in shifting down by 32(FFTDATA_SIZE) bits.
793   */
794 
795   scale -= 32;
796 
797   ASSERT(maxval >= 0);
798   ASSERT(!(maxval & 0xC0000000));
799   /* we've done something wrong if */
800   /* either of the top two bits  */
801   /* get used!    */
802 
803   return(-scale);  /* return the amount we have shifted the */
804   /* data _down_ by    */
805 
806 }
807 
808 
809 /*****************************************************************************/
810 
configure_fft(fft_info * fft,int size)811 void configure_fft(fft_info *fft, int size)
812 {
813   unsigned int log2Length, length;
814 
815   log2Length = 0;
816   length = size / 2;
817   while (length > 1)
818   {
819     length = length >> 1;
820     log2Length++;
821   }
822 
823   ASSERT(size == 1 << (log2Length + 1));
824   fft->size2 = size;
825   fft->size = size / 2;
826 
827   fft->m_srfft = new_srfft(log2Length);
828   fft->real = (fftdata*) CALLOC(size + 2, sizeof(fftdata), "srfft.fft_data");
829   fft->imag = fft->real + size / 2 + 1;
830 }
831 
fft_perform_and_magsq(fft_info * fft)832 int fft_perform_and_magsq(fft_info *fft)
833 {
834   unsigned n = fft->size2;
835   fftdata     *real = fft->real;
836   srfft       *pSrfft = fft->m_srfft;
837   ;
838 
839   return do_real_fft_magsq(pSrfft, n, real);
840 }
841 
unconfigure_fft(fft_info * fft)842 void unconfigure_fft(fft_info *fft)
843 {
844   ASSERT(fft);
845   delete_srfft(fft->m_srfft);
846   FREE((char*)fft->real);
847 }
848 
849 
place_sample_data(fft_info * fft,fftdata * seq,fftdata * smooth,int num)850 int place_sample_data(fft_info *fft, fftdata *seq, fftdata *smooth, int num)
851 {
852   int ii, size2;
853   srfft * pSrfft;
854 
855   pSrfft = fft->m_srfft;
856 
857   ASSERT(fft);
858   ASSERT(seq);
859   ASSERT(smooth);
860   ASSERT(num <= (int)fft->size2);
861   size2 = fft->size2;
862 
863   for (ii = 0; ii < num; ii++)
864   {
865     fft->real[ii] = (fftdata)(seq[ii] * smooth[ii]);
866   }
867 
868   for (; ii < size2; ii++)
869   {
870     fft->real[ii] = 0;
871   }
872 
873   return(-(HALF_FFTDATA_SIZE - 1));
874 }
875 
876