1 /*---------------------------------------------------------------------------*
2 * sp_fft.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 /*
21 ****************************************************************************
22 **
23 ** FILE: sp_fft.cpp
24 **
25 ** CREATED: 11-September-99
26 **
27 ** DESCRIPTION: Split-Radix FFT
28 **
29 **
30 **
31 **
32 ** MODIFICATIONS:
33 ** Revision history log
34 ** VSS revision history. Do not edit by hand.
35 **
36 ** $NoKeywords: $
37 **
38 */
39
40 #ifndef _RTT
41 #include <stdio.h>
42 #endif
43 #include <math.h>
44 #include <assert.h>
45 #include "front.h"
46 #include "portable.h"
47
48 #include "sp_fft.h"
49 #include "himul32.h"
50 /*extern "C" asr_int32_t himul32(asr_int32_t factor1, asr_int32_t factor2);*/
51
52 /* >>>> Fixed Point, Floting Point, and Machine Specific Methods <<<<
53
54 We will localize all fixed point, floating point, and machine specific
55 operations into the following methods so that in the main body of the code
56 we would not need to worry these issues.
57 */
58
59 /* convert trigonomy function data to its required representation*/
60 static trigonomydata
to_trigonomydata(double a)61 to_trigonomydata(double a)
62 {
63 unsigned scale = (unsigned int)(1 << (8 * sizeof(trigonomydata) - 1));
64 return (trigonomydata)(a * scale);
65 }
66
67 /* Do a sign-extending right shift of x by i bits, and
68 * round the result based on the leftmost bit shifted out.
69 * Must have 1 <= i < 32.
70 * Note that C doesn't define whether right shift of signed
71 * ints sign-extends or zero-fills.
72 * On platforms that do sign-extend, use the native right shift.
73 * Else use a short branch-free sequence that forces in copies
74 * of the sign bit.
75 */
rshift(asr_int32_t x,int i)76 static PINLINE asr_int32_t rshift(asr_int32_t x, int i)
77 {
78 asr_int32_t xshift = x >> i;
79 ASSERT(i >= 1 && i < 32);
80
81 #if -1 >> 31 != -1
82 asr_int32_t signbit = (asr_int32_t)(((asr_uint32_t)x & 0x80000000U) >> i);
83 xshift |= -signbit;
84 #endif
85
86 xshift += (x >> (i - 1)) & 1;
87 return xshift;
88 }
89
90
91 /* compute (a + jb)*(c + jd) = a*c - b*d + j(ad + bc) */
complex_multiplier(trigonomydata a,trigonomydata b,fftdata c,fftdata d,fftdata * real,fftdata * imag)92 static PINLINE void complex_multiplier(trigonomydata a, trigonomydata b,
93 fftdata c, fftdata d,
94 fftdata* real, fftdata* imag)
95 {
96 /*
97 himul32(factor1, factor2) = floor( (factor1 * factor2) / 2**32 )
98 we need floor( (factor1 * factor2) / 2**31 )
99 retain one more bit of accuracy by left shifting first.
100 */
101 c <<= 1;
102 d <<= 1;
103 *real = himul32(a, c) - himul32(b, d);
104 *imag = himul32(a, d) + himul32(b, c);
105 }
106
107 /* determine the maximum number of bits required to represent the data */
data_bits(const int length,fftdata data[])108 static PINLINE int data_bits(const int length, fftdata data[])
109 {
110 asr_uint32_t bits = 0;
111 int d = 0;
112 int i;
113
114 ASSERT(sizeof(data[0]) == 4);
115
116 for (i = 0; i < length; i++)
117 {
118 d = data[i];
119 bits |= (d > 0) ? d : -d;
120 }
121
122 d = 0;
123 while (bits > 0)
124 {
125 bits >>= 1;
126 d++;
127 }
128
129 return d;
130 }
131
132
133 /* >>>> Fixed Point, Floting Point, and Machine Independent Methods <<<< */
134
135 /* constructor */
new_srfft(unsigned logLength)136 srfft* new_srfft(unsigned logLength)
137 {
138 srfft* pthis;
139
140 /* cannot do smaller than 4 point FFT */
141 ASSERT(logLength >= 2);
142
143 pthis = (srfft*) CALLOC(1, sizeof(srfft), "srfft");
144 pthis->m_logLength = logLength;
145 pthis->m_length = 1 << logLength;
146
147 allocate_bitreverse_tbl(pthis);
148 allocate_butterfly_tbl(pthis);
149 allocate_trigonomy_tbl(pthis);
150
151 return pthis;
152 }
153
154 /* destructor */
delete_srfft(srfft * pSrfft)155 void delete_srfft(srfft* pSrfft)
156 {
157 FREE((char*)pSrfft->m_sin3Tbl);
158 FREE((char*)pSrfft->m_cos3Tbl);
159 FREE((char*)pSrfft->m_sin2Tbl);
160 FREE((char*)pSrfft->m_cos2Tbl);
161 FREE((char*)pSrfft->m_sin1Tbl);
162 FREE((char*)pSrfft->m_cos1Tbl);
163 FREE((char*)pSrfft->m_butterflyIndexTbl);
164 FREE((char*)pSrfft->m_bitreverseTbl);
165 FREE((char*)pSrfft);
166 }
167
168 /*
169 allocate L shaped butterfly index lookup table
170 */
allocate_butterfly_tbl(srfft * pthis)171 void allocate_butterfly_tbl(srfft* pthis)
172 {
173 unsigned butterflyLength, butterflies, *butterflyIndex;
174 unsigned m, n, n2, i, j, i0, is, id, ii, ib;
175
176
177 /* compute total number of L shaped butterflies */
178 m = pthis->m_logLength;
179 butterflyLength = 0;
180 butterflies = 0;
181 for (i = 0; i < m; i++)
182 {
183 butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
184 butterflyLength += butterflies;
185 }
186
187 /* Allocate m more spaces to store size information */
188 butterflyLength += m;
189 butterflyIndex = (unsigned*) CALLOC(butterflyLength, sizeof(unsigned), "srfft.butterflyIndex");
190
191 /* Compute and store L shaped butterfly indexes at each stage */
192 n = pthis->m_length;
193 n2 = n << 1;
194 butterflyLength = 0;
195 ib = 0;
196 for (i = 0; i < m; i++)
197 {
198 butterflies = (i % 2) ? butterflyLength : butterflyLength + 1;
199 butterflyLength += butterflies;
200
201 /* Store number of L butterflies at stage m-i*/
202 butterflyIndex[ib++] = butterflies;
203
204 /* Compute Sorensen, Heideman, and Burrus indexes for L shaped butterfiles */
205 id = n2;
206 is = 0;
207 n2 = n2 >> 1;
208 while (is < n)
209 {
210 for (i0 = is; i0 < n; i0 += id)
211 {
212 butterflyIndex[ib] = i0;
213 if (i0 != 0)
214 {
215 /* sort bufferfly index in increasing order to simplify look up */
216 ii = ib;
217 while (butterflyIndex[ii] < butterflyIndex[ii-1])
218 {
219 j = butterflyIndex[ii];
220 butterflyIndex[ii] = butterflyIndex[ii-1];
221 butterflyIndex[--ii] = j;
222 }
223 }
224 ib++;
225 }
226 is = 2 * id - n2;
227 id = id << 2;
228 }
229 }
230 pthis->m_butterflyIndexTbl = butterflyIndex;
231
232 /* move to stage 2 buffer index table */
233 for (i = 0; i < m - 2; i++)
234 {
235 butterflies = *butterflyIndex;
236 butterflyIndex += (butterflies + 1);
237 }
238
239 /*
240 Since we want to compute four point butterflies directly,
241 when we compute two point butterflieswe at the last stage
242 we must bypass those two point butterflies that are decomposed
243 from previous stage's four point butterflies .
244 */
245 butterflies = *butterflyIndex++; /* number of four point butterflies */
246 ii = butterflies + 1; /* index to the two point butterflies*/
247 for (i = 0; i < butterflies; i++)
248 {
249 j = butterflyIndex[i];
250
251 /*
252 find those two point butterflies that are
253 decomposed from the four point butterflies
254 */
255 while (butterflyIndex[ii] != j) /* look up is sure so do not need worry over bound*/
256 {
257 ii++;
258 }
259 butterflyIndex[ii++] = 0;
260
261 ASSERT(ii <= butterflyLength + m);
262 }
263 }
264
265
266 /*
267 Allocate trigonoy function lookup tables
268 */
allocate_trigonomy_tbl(srfft * pthis)269 void allocate_trigonomy_tbl(srfft* pthis)
270 {
271 trigonomydata *pcos1, *psin1, *pcos2, *psin2, *pcos3, *psin3;
272 unsigned m, n, n2, n4, i, j, ii, nt;
273 double e, radias, radias3;
274
275 m = pthis->m_logLength;
276 n = pthis->m_length;
277 nt = (n >> 1) - 1;
278 pcos1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
279 psin1 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
280 pcos2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
281 psin2 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
282 pcos3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
283 psin3 = (trigonomydata*) CALLOC(nt, sizeof(trigonomydata), "srfft.trigonomydata");
284
285 ii = 0;
286 n2 = n << 1;
287 for (i = 0; i < m - 1; i++)
288 {
289 n2 = n2 >> 1;
290 n4 = n2 >> 2;
291 e = 6.283185307179586 / n2;
292
293 for (j = 0; j < n4; j++)
294 {
295 if (j != 0) /* there is no need for radias zero trigonomy tables */
296 {
297 radias = j * e;
298 radias3 = 3.0 * radias;
299
300 pcos1[ii] = to_trigonomydata(cos(radias));
301 psin1[ii] = to_trigonomydata(sin(radias));
302 pcos3[ii] = to_trigonomydata(cos(radias3));
303 psin3[ii] = to_trigonomydata(sin(radias3));
304 }
305 ii++;
306 }
307 }
308
309 for (i = 0; i < nt; i++)
310 {
311 radias = 3.141592653589793 * (i + 1) / n;
312
313 pcos2[i] = to_trigonomydata(cos(radias));
314 psin2[i] = to_trigonomydata(sin(radias));
315 }
316
317 pthis->m_cos1Tbl = pcos1;
318 pthis->m_sin1Tbl = psin1;
319 pthis->m_cos2Tbl = pcos2;
320 pthis->m_sin2Tbl = psin2;
321 pthis->m_cos3Tbl = pcos3;
322 pthis->m_sin3Tbl = psin3;
323
324 }
325
326 /*
327 Allocate bit reverse tables
328 */
allocate_bitreverse_tbl(srfft * pthis)329 void allocate_bitreverse_tbl(srfft* pthis)
330 {
331 unsigned forward, reverse, *tbl;
332 unsigned m, n, i, j;
333
334 n = pthis->m_length;
335 tbl = (unsigned*) CALLOC(n, sizeof(unsigned), "srfft.bitreverseTbl");
336 for (j = 0; j < n; j++) tbl[j] = 0;
337
338 m = pthis->m_logLength;
339 forward = 1;
340 reverse = n >> 1;
341 for (i = 0; i < m; i++)
342 {
343 for (j = 0; j < n; j++)
344 {
345 if (forward & j) tbl[j] |= reverse;
346 }
347 reverse >>= 1;
348 forward <<= 1;
349 }
350
351 pthis->m_bitreverseTbl = tbl;
352 }
353
354
355 /*
356 Compute a four point FFT that requires no multiplications
357 */
four_point_fft1(fftdata * data)358 static PINLINE void four_point_fft1(fftdata* data)
359 {
360 fftdata r0, r1, r2;
361
362 r0 = data[0];
363 r1 = data[4];
364 data[0] = r0 + r1;
365 data[4] = r0 - r1;
366
367 r0 = data[2];
368 r1 = data[6];
369 data[2] = r0 + r1;
370 data[6] = r0 - r1;
371
372 r0 = data[1];
373 r1 = data[5];
374 data[1] = r0 + r1;
375 data[5] = r0 - r1;
376
377 r0 = data[3];
378 r1 = data[7];
379 data[3] = r0 + r1;
380 data[7] = r0 - r1;
381
382 r0 = data[0];
383 r1 = data[2];
384 data[0] = r0 + r1;
385 data[2] = r0 - r1;
386
387 r0 = data[1];
388 r1 = data[3];
389 data[1] = r0 + r1;
390 data[3] = r0 - r1;
391
392 r0 = data[4];
393 r1 = data[7];
394 r2 = data[6];
395 data[4] = r0 + r1;
396 data[6] = r0 - r1;
397
398 r0 = data[5];
399 data[5] = r0 - r2;
400 data[7] = r0 + r2;
401 }
402
403 /*
404 Compute a two point FFT that requires no multiplications
405 */
two_point_fft1(fftdata * data)406 static PINLINE void two_point_fft1(fftdata* data)
407 {
408 fftdata r0, r1;
409
410 r0 = data[0];
411 r1 = data[2];
412 data[0] = r0 + r1;
413 data[2] = r0 - r1;
414
415 r0 = data[1];
416 r1 = data[3];
417 data[1] = r0 + r1;
418 data[3] = r0 - r1;
419 }
420
421
comp_L_butterfly1(unsigned butteflyIndex,unsigned quarterLength,trigonomydata cc1,trigonomydata ss1,trigonomydata cc3,trigonomydata ss3,fftdata * data)422 static PINLINE void comp_L_butterfly1(unsigned butteflyIndex, unsigned quarterLength,
423 trigonomydata cc1, trigonomydata ss1,
424 trigonomydata cc3, trigonomydata ss3,
425 fftdata* data)
426 {
427 unsigned k1, k2, k3;
428 fftdata r0, r1, r2, r3, i0, i1, i2, i3;
429
430 quarterLength <<= 1;
431
432 k1 = quarterLength;
433 k2 = k1 + quarterLength;
434 k3 = k2 + quarterLength;
435
436 r0 = data[0];
437 r1 = data[k1];
438 r2 = data[k2];
439 r3 = data[k3];
440 i0 = data[1];
441 i1 = data[k1+1];
442 i2 = data[k2+1];
443 i3 = data[k3+1];
444
445 /* compute the radix-2 butterfly */
446 data[0] = r0 + r2;
447 data[k1] = r1 + r3;
448 data[1] = i0 + i2;
449 data[k1+1] = i1 + i3;
450
451 /* compute two radix-4 butterflies with twiddle factors */
452 r0 -= r2;
453 r1 -= r3;
454 i0 -= i2;
455 i1 -= i3;
456
457 r2 = r0 + i1;
458 i2 = r1 - i0;
459 r3 = r0 - i1;
460 i3 = r1 + i0;
461
462 /*
463 optimize the butterfly computation for zero's power twiddle factor
464 that does not need multimplications
465 */
466 if (butteflyIndex == 0)
467 {
468 data[k2] = r2;
469 data[k2+1] = -i2;
470 data[k3] = r3;
471 data[k3+1] = i3;
472 }
473 else
474 {
475 complex_multiplier(cc1, -ss1, r2, -i2, data + k2, data + k2 + 1);
476 complex_multiplier(cc3, -ss3, r3, i3, data + k3, data + k3 + 1);
477 }
478 }
479
480 /**********************************************************************/
do_fft1(srfft * pthis,unsigned length2,fftdata * data)481 void do_fft1(srfft* pthis, unsigned length2, fftdata* data)
482 {
483 unsigned *indexTbl, indexLength;
484 trigonomydata *cos1, *sin1, *cos3, *sin3;
485 trigonomydata cc1, ss1, cc3, ss3;
486 unsigned n, m, n4, i, j, k, ii, k0;
487 fftdata temp;
488
489 /* Load butterfly index table */
490 indexTbl = pthis->m_butterflyIndexTbl;
491 indexLength = 0;
492
493 /* Load cosine and sine tables */
494 cos1 = pthis->m_cos1Tbl;
495 sin1 = pthis->m_sin1Tbl;
496 cos3 = pthis->m_cos3Tbl;
497 sin3 = pthis->m_sin3Tbl;
498
499 /* stages of butterfly computation*/
500 n = pthis->m_length;
501 m = pthis->m_logLength;
502
503
504 /*
505 compute L shaped butterfies util only 4 and 2 point
506 butterfiles are left
507 */
508 n4 = n >> 1;
509 ii = 0;
510 for (i = 0; i < m - 2; i++)
511 {
512 n4 >>= 1;
513
514 /* read the number of L shaped butterfly nodes at the stage */
515 indexLength = *indexTbl++;
516
517 /*
518 compute one L shaped butterflies at each stage
519 j (time) and k (frequency) loops are reversed to minimize
520 trigonomy table lookups
521 */
522 for (j = 0; j < n4; j++)
523 {
524 cc1 = cos1[ii];
525 ss1 = sin1[ii];
526 cc3 = cos3[ii];
527 ss3 = sin3[ii++];
528 for (k = 0; k < indexLength; k++)
529 {
530 k0 = indexTbl[k] + j;
531 k0 <<= 1;
532 comp_L_butterfly1(j, n4, cc1, ss1, cc3, ss3, data + k0);
533 }
534 }
535
536 /* Move to the butterfly index table of the next stage*/
537 indexTbl += indexLength;
538 }
539
540 /* Compute 4 point butterflies at stage 2 */
541 indexLength = *indexTbl++;
542 for (k = 0; k < indexLength; k++)
543 {
544 k0 = indexTbl[k];
545 k0 <<= 1;
546 four_point_fft1(data + k0);
547 }
548 indexTbl += indexLength;
549
550 /* Compute 2 point butterflies of the last stage */
551 indexLength = *indexTbl++;
552 for (k = 0; k < indexLength; k++)
553 {
554 k0 = indexTbl[k];
555 k0 <<= 1;
556
557 /* k0 = 0 implies these nodes have been computed */
558 if (k0 != 0)
559 {
560 two_point_fft1(data + k0);
561 }
562 }
563
564 /* Bit reverse the data array */
565 indexTbl = pthis->m_bitreverseTbl;
566 for (i = 0; i < n; i++)
567 {
568 ii = indexTbl[i];
569 if (i < ii)
570 {
571 j = i << 1;
572 k = ii << 1;
573 temp = data[j];
574 data[j] = data[k];
575 data[k] = temp;
576
577 j++;
578 k++;
579 temp = data[j];
580 data[j] = data[k];
581 data[k] = temp;
582 }
583 }
584 }
585
do_real_fft(srfft * pthis,unsigned n,fftdata * data)586 void do_real_fft(srfft* pthis, unsigned n, fftdata* data)
587 {
588 unsigned n2;
589 unsigned i, i1, i2, i3, i4;
590 fftdata h1r, h1i, h2r, h2i, tr, ti;
591 trigonomydata *cos2, *sin2;
592
593 cos2 = pthis->m_cos2Tbl;
594 sin2 = pthis->m_sin2Tbl;
595
596 /* do a complex FFT of half size using the even indexed data
597 ** as real component and odd indexed data as imaginary data components
598 */
599
600 do_fft1(pthis, n, data);
601
602 /*
603 ** queeze the real valued first and last component of
604 ** the complex transform as elements data[0] and data[1]
605 */
606 tr = data[0];
607 ti = data[1];
608 data[0] = (tr + ti);
609 data[1] = (tr - ti);
610
611 /* do the rest of elements*/
612 n2 = n >> 2;
613 for (i = 1; i < n2; i++)
614 {
615 i1 = i << 1;
616 i2 = i1 + 1;
617 i3 = n - i1;
618 i4 = i3 + 1;
619
620 h1r = (data[i1] + data[i3]) / 2;
621 h1i = (data[i2] - data[i4]) / 2;
622 h2r = (data[i2] + data[i4]) / 2;
623 h2i = -(data[i1] - data[i3]) / 2;
624
625 complex_multiplier(cos2[i-1], -sin2[i-1], h2r, h2i, &tr, &ti);
626
627 data[i1] = h1r + tr;
628 data[i2] = h1i + ti;
629 data[i3] = h1r - tr;
630 data[i4] = -h1i + ti;
631 }
632 /* center one needs no multiplication, but has to reverse sign */
633 i = (n >> 1);
634 data[i+1] = -data[i+1];
635
636 }
637
638 /*****************************************************************************/
639
do_real_fft_magsq(srfft * pthis,unsigned n,fftdata * data)640 int do_real_fft_magsq(srfft* pthis, unsigned n, fftdata* data)
641 {
642 fftdata tr, ti, last;
643 unsigned i, ii, n1;
644 int scale = 0;
645 int s = 0;
646 unsigned maxval = 0;
647
648
649 /*
650 ** Windowed fftdata has an unknown data length - determine this using
651 ** data_bits(), a maximum of:
652 **
653 ** fixed data = windowedData * 2**HAMMING_DATA_BITS
654 **
655 ** FFT will grow data log2Length. In order to avoid data overflow,
656 ** we can scale data by a factor
657 **
658 ** scale = 8*sizeof(fftdata) - data_bits() - log2Length
659 **
660 ** In other words, we now have
661 **
662 ** fixed data = windowedData * 2**HAMMING_DATA_BITS * 2**scale
663 **
664 */
665
666
667 scale = 8 * sizeof(fftdata) - 2 - pthis->m_logLength;
668 scale -= data_bits(n, data);
669
670 for (i = 0; i < n; i++)
671 {
672 if (scale < 0)
673 {
674 data[i] = rshift(data[i], -scale);
675 }
676 else
677 {
678 data[i] <<= scale;
679 }
680 }
681
682 /* compute the real input fft, the real valued first and last component of
683 ** the complex transform is stored as elements data[0] and data[1]
684 */
685
686 do_real_fft(pthis, n, data);
687
688 /* After fft, we now have the data,
689 **
690 ** fixed data = fftdata * 2**HAMMING_DATA_BITS * 2**scale
691 **
692 ** to get fft data, we then need to reverse-shift the fixed data by the
693 ** scale constant;
694 **
695 ** However, since our purpose is to compute magnitude, we can combine
696 ** this step into the magnitude computation. Notice that
697 **
698 ** fixed data = fftdata * 2**(8*sizeof(fftdata) - DATA_BITS - log2Length)
699 **
700 ** if we use himul32 to compute the magnitude, which gives us,
701 **
702 ** fixed magnitude = fftdata magnitude * 2**(2*(32 - 16 - log2Length)) - 2**32)
703 ** = fftdata magnitude * 2**(-2*log2Length)
704 **
705 ** to get the fixed magnitude = fftdata magnitude * 2**(-log2Length-1)
706 ** = fftdata magnitude/FFT length
707 ** we need to upscale fftdata to cancel out the log2Lenght-1 factor in
708 ** the fixed magnitude
709 **
710 ** Notice that upshift scale log2Lenght-1 is not a constant, but a
711 ** function of FFT length.
712 ** Funthermore, even and odd log2Length-1 must be handled differently.
713 **
714 */
715
716 /*
717 ** This bit is a lot simpler now, we just aim to get the pre-magsqu
718 ** values in a 30-bit range + sign.
719 ** This is the max val if we want r*r+i*i guarenteed < signed int64 range.
720 ** So shift the data up until it is ==30 bits (FFTDATA_SIZE-2)
721 */
722
723 s = (FFTDATA_SIZE - 2) - data_bits(n, data);
724 /* n is twice the size, so this */
725
726
727 for (i = 0; i < n; i++)
728 {
729 if (s < 0)
730 {
731 data[i] = rshift(data[i], -s);
732 }
733 else
734 {
735 data[i] <<= s;
736 }
737 }
738
739 scale += s;
740
741 /*
742 ** OK, now we are in the 30bit range, we can do a magsq.
743 ** This magsq output must be less that 60bit plus sign.
744 */
745
746 /*
747 ** Special at start as DC and Nyquist freqs are in data[0] and data[1]
748 ** respectively.
749 */
750
751 tr = data[0];
752 data[0] = himul32(tr, tr);
753 maxval |= data[0];
754
755 tr = data[1];
756 last = himul32(tr, tr);
757 maxval |= last;
758
759 n1 = n >> 1;
760 for (i = 1; i < n1; i++)
761 {
762 ii = i << 1;
763 tr = data[ii];
764 data[i] = himul32(tr, tr);
765
766 ti = data[++ii];
767 data[i] += himul32(ti, ti);
768
769 maxval |= data[i];
770 }
771
772 data[n1] = last; /* now the Nyquist freq can be put in place */
773
774 /*
775 ** computing magnitude _squared_ means the scale is effectively
776 ** applied twice
777 */
778
779 scale *= 2;
780
781 /*
782 ** account for inherent scale of fft - we have do to this here as each
783 ** stage scales by sqrt(2), and we couldn't add this to scale until
784 ** after it had been multiplied by two (??)
785 ** TODO: The truth is we got here by trial and error
786 ** This should be checked.
787 */
788
789 scale += pthis->m_logLength + 1;
790
791 /*
792 ** doing the himul32() shift results in shifting down by 32(FFTDATA_SIZE) bits.
793 */
794
795 scale -= 32;
796
797 ASSERT(maxval >= 0);
798 ASSERT(!(maxval & 0xC0000000));
799 /* we've done something wrong if */
800 /* either of the top two bits */
801 /* get used! */
802
803 return(-scale); /* return the amount we have shifted the */
804 /* data _down_ by */
805
806 }
807
808
809 /*****************************************************************************/
810
configure_fft(fft_info * fft,int size)811 void configure_fft(fft_info *fft, int size)
812 {
813 unsigned int log2Length, length;
814
815 log2Length = 0;
816 length = size / 2;
817 while (length > 1)
818 {
819 length = length >> 1;
820 log2Length++;
821 }
822
823 ASSERT(size == 1 << (log2Length + 1));
824 fft->size2 = size;
825 fft->size = size / 2;
826
827 fft->m_srfft = new_srfft(log2Length);
828 fft->real = (fftdata*) CALLOC(size + 2, sizeof(fftdata), "srfft.fft_data");
829 fft->imag = fft->real + size / 2 + 1;
830 }
831
fft_perform_and_magsq(fft_info * fft)832 int fft_perform_and_magsq(fft_info *fft)
833 {
834 unsigned n = fft->size2;
835 fftdata *real = fft->real;
836 srfft *pSrfft = fft->m_srfft;
837 ;
838
839 return do_real_fft_magsq(pSrfft, n, real);
840 }
841
unconfigure_fft(fft_info * fft)842 void unconfigure_fft(fft_info *fft)
843 {
844 ASSERT(fft);
845 delete_srfft(fft->m_srfft);
846 FREE((char*)fft->real);
847 }
848
849
place_sample_data(fft_info * fft,fftdata * seq,fftdata * smooth,int num)850 int place_sample_data(fft_info *fft, fftdata *seq, fftdata *smooth, int num)
851 {
852 int ii, size2;
853 srfft * pSrfft;
854
855 pSrfft = fft->m_srfft;
856
857 ASSERT(fft);
858 ASSERT(seq);
859 ASSERT(smooth);
860 ASSERT(num <= (int)fft->size2);
861 size2 = fft->size2;
862
863 for (ii = 0; ii < num; ii++)
864 {
865 fft->real[ii] = (fftdata)(seq[ii] * smooth[ii]);
866 }
867
868 for (; ii < size2; ii++)
869 {
870 fft->real[ii] = 0;
871 }
872
873 return(-(HALF_FFTDATA_SIZE - 1));
874 }
875
876