1 /********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the XIPHOPHORUS Company http://www.xiph.org/ *
10 * *
11 ********************************************************************
12
13 function: *unnormalized* fft transform
14 last mod: $Id: smallft.c,v 1.19 2003/10/08 05:12:37 jm Exp $
15
16 ********************************************************************/
17
18 /* FFT implementation from OggSquish, minus cosine transforms,
19 * minus all but radix 2/4 case. In Vorbis we only need this
20 * cut-down version.
21 *
22 * To do more than just power-of-two sized vectors, see the full
23 * version I wrote for NetLib.
24 *
25 * Note that the packing is a little strange; rather than the FFT r/i
26 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
28 * FORTRAN version
29 */
30
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34
35 #include <math.h>
36 #include "smallft.h"
37 #include "arch.h"
38 #include "os_support.h"
39
drfti1(int n,float * wa,int * ifac)40 static void drfti1(int n, float *wa, int *ifac){
41 static int ntryh[4] = { 4,2,3,5 };
42 static float tpi = 6.28318530717958648f;
43 float arg,argh,argld,fi;
44 int ntry=0,i,j=-1;
45 int k1, l1, l2, ib;
46 int ld, ii, ip, is, nq, nr;
47 int ido, ipm, nfm1;
48 int nl=n;
49 int nf=0;
50
51 L101:
52 j++;
53 if (j < 4)
54 ntry=ntryh[j];
55 else
56 ntry+=2;
57
58 L104:
59 nq=nl/ntry;
60 nr=nl-ntry*nq;
61 if (nr!=0) goto L101;
62
63 nf++;
64 ifac[nf+1]=ntry;
65 nl=nq;
66 if(ntry!=2)goto L107;
67 if(nf==1)goto L107;
68
69 for (i=1;i<nf;i++){
70 ib=nf-i+1;
71 ifac[ib+1]=ifac[ib];
72 }
73 ifac[2] = 2;
74
75 L107:
76 if(nl!=1)goto L104;
77 ifac[0]=n;
78 ifac[1]=nf;
79 argh=tpi/n;
80 is=0;
81 nfm1=nf-1;
82 l1=1;
83
84 if(nfm1==0)return;
85
86 for (k1=0;k1<nfm1;k1++){
87 ip=ifac[k1+2];
88 ld=0;
89 l2=l1*ip;
90 ido=n/l2;
91 ipm=ip-1;
92
93 for (j=0;j<ipm;j++){
94 ld+=l1;
95 i=is;
96 argld=(float)ld*argh;
97 fi=0.f;
98 for (ii=2;ii<ido;ii+=2){
99 fi+=1.f;
100 arg=fi*argld;
101 wa[i++]=cos(arg);
102 wa[i++]=sin(arg);
103 }
104 is+=ido;
105 }
106 l1=l2;
107 }
108 }
109
fdrffti(int n,float * wsave,int * ifac)110 static void fdrffti(int n, float *wsave, int *ifac){
111
112 if (n == 1) return;
113 drfti1(n, wsave+n, ifac);
114 }
115
dradf2(int ido,int l1,float * cc,float * ch,float * wa1)116 static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
117 int i,k;
118 float ti2,tr2;
119 int t0,t1,t2,t3,t4,t5,t6;
120
121 t1=0;
122 t0=(t2=l1*ido);
123 t3=ido<<1;
124 for(k=0;k<l1;k++){
125 ch[t1<<1]=cc[t1]+cc[t2];
126 ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
127 t1+=ido;
128 t2+=ido;
129 }
130
131 if(ido<2)return;
132 if(ido==2)goto L105;
133
134 t1=0;
135 t2=t0;
136 for(k=0;k<l1;k++){
137 t3=t2;
138 t4=(t1<<1)+(ido<<1);
139 t5=t1;
140 t6=t1+t1;
141 for(i=2;i<ido;i+=2){
142 t3+=2;
143 t4-=2;
144 t5+=2;
145 t6+=2;
146 tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
147 ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
148 ch[t6]=cc[t5]+ti2;
149 ch[t4]=ti2-cc[t5];
150 ch[t6-1]=cc[t5-1]+tr2;
151 ch[t4-1]=cc[t5-1]-tr2;
152 }
153 t1+=ido;
154 t2+=ido;
155 }
156
157 if(ido%2==1)return;
158
159 L105:
160 t3=(t2=(t1=ido)-1);
161 t2+=t0;
162 for(k=0;k<l1;k++){
163 ch[t1]=-cc[t2];
164 ch[t1-1]=cc[t3];
165 t1+=ido<<1;
166 t2+=ido;
167 t3+=ido;
168 }
169 }
170
dradf4(int ido,int l1,float * cc,float * ch,float * wa1,float * wa2,float * wa3)171 static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
172 float *wa2,float *wa3){
173 static float hsqt2 = .70710678118654752f;
174 int i,k,t0,t1,t2,t3,t4,t5,t6;
175 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
176 t0=l1*ido;
177
178 t1=t0;
179 t4=t1<<1;
180 t2=t1+(t1<<1);
181 t3=0;
182
183 for(k=0;k<l1;k++){
184 tr1=cc[t1]+cc[t2];
185 tr2=cc[t3]+cc[t4];
186
187 ch[t5=t3<<2]=tr1+tr2;
188 ch[(ido<<2)+t5-1]=tr2-tr1;
189 ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
190 ch[t5]=cc[t2]-cc[t1];
191
192 t1+=ido;
193 t2+=ido;
194 t3+=ido;
195 t4+=ido;
196 }
197
198 if(ido<2)return;
199 if(ido==2)goto L105;
200
201
202 t1=0;
203 for(k=0;k<l1;k++){
204 t2=t1;
205 t4=t1<<2;
206 t5=(t6=ido<<1)+t4;
207 for(i=2;i<ido;i+=2){
208 t3=(t2+=2);
209 t4+=2;
210 t5-=2;
211
212 t3+=t0;
213 cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
214 ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
215 t3+=t0;
216 cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
217 ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
218 t3+=t0;
219 cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
220 ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
221
222 tr1=cr2+cr4;
223 tr4=cr4-cr2;
224 ti1=ci2+ci4;
225 ti4=ci2-ci4;
226
227 ti2=cc[t2]+ci3;
228 ti3=cc[t2]-ci3;
229 tr2=cc[t2-1]+cr3;
230 tr3=cc[t2-1]-cr3;
231
232 ch[t4-1]=tr1+tr2;
233 ch[t4]=ti1+ti2;
234
235 ch[t5-1]=tr3-ti4;
236 ch[t5]=tr4-ti3;
237
238 ch[t4+t6-1]=ti4+tr3;
239 ch[t4+t6]=tr4+ti3;
240
241 ch[t5+t6-1]=tr2-tr1;
242 ch[t5+t6]=ti1-ti2;
243 }
244 t1+=ido;
245 }
246 if(ido&1)return;
247
248 L105:
249
250 t2=(t1=t0+ido-1)+(t0<<1);
251 t3=ido<<2;
252 t4=ido;
253 t5=ido<<1;
254 t6=ido;
255
256 for(k=0;k<l1;k++){
257 ti1=-hsqt2*(cc[t1]+cc[t2]);
258 tr1=hsqt2*(cc[t1]-cc[t2]);
259
260 ch[t4-1]=tr1+cc[t6-1];
261 ch[t4+t5-1]=cc[t6-1]-tr1;
262
263 ch[t4]=ti1-cc[t1+t0];
264 ch[t4+t5]=ti1+cc[t1+t0];
265
266 t1+=ido;
267 t2+=ido;
268 t4+=t3;
269 t6+=ido;
270 }
271 }
272
dradfg(int ido,int ip,int l1,int idl1,float * cc,float * c1,float * c2,float * ch,float * ch2,float * wa)273 static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
274 float *c2,float *ch,float *ch2,float *wa){
275
276 static float tpi=6.283185307179586f;
277 int idij,ipph,i,j,k,l,ic,ik,is;
278 int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
279 float dc2,ai1,ai2,ar1,ar2,ds2;
280 int nbd;
281 float dcp,arg,dsp,ar1h,ar2h;
282 int idp2,ipp2;
283
284 arg=tpi/(float)ip;
285 dcp=cos(arg);
286 dsp=sin(arg);
287 ipph=(ip+1)>>1;
288 ipp2=ip;
289 idp2=ido;
290 nbd=(ido-1)>>1;
291 t0=l1*ido;
292 t10=ip*ido;
293
294 if(ido==1)goto L119;
295 for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
296
297 t1=0;
298 for(j=1;j<ip;j++){
299 t1+=t0;
300 t2=t1;
301 for(k=0;k<l1;k++){
302 ch[t2]=c1[t2];
303 t2+=ido;
304 }
305 }
306
307 is=-ido;
308 t1=0;
309 if(nbd>l1){
310 for(j=1;j<ip;j++){
311 t1+=t0;
312 is+=ido;
313 t2= -ido+t1;
314 for(k=0;k<l1;k++){
315 idij=is-1;
316 t2+=ido;
317 t3=t2;
318 for(i=2;i<ido;i+=2){
319 idij+=2;
320 t3+=2;
321 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
322 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
323 }
324 }
325 }
326 }else{
327
328 for(j=1;j<ip;j++){
329 is+=ido;
330 idij=is-1;
331 t1+=t0;
332 t2=t1;
333 for(i=2;i<ido;i+=2){
334 idij+=2;
335 t2+=2;
336 t3=t2;
337 for(k=0;k<l1;k++){
338 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
339 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
340 t3+=ido;
341 }
342 }
343 }
344 }
345
346 t1=0;
347 t2=ipp2*t0;
348 if(nbd<l1){
349 for(j=1;j<ipph;j++){
350 t1+=t0;
351 t2-=t0;
352 t3=t1;
353 t4=t2;
354 for(i=2;i<ido;i+=2){
355 t3+=2;
356 t4+=2;
357 t5=t3-ido;
358 t6=t4-ido;
359 for(k=0;k<l1;k++){
360 t5+=ido;
361 t6+=ido;
362 c1[t5-1]=ch[t5-1]+ch[t6-1];
363 c1[t6-1]=ch[t5]-ch[t6];
364 c1[t5]=ch[t5]+ch[t6];
365 c1[t6]=ch[t6-1]-ch[t5-1];
366 }
367 }
368 }
369 }else{
370 for(j=1;j<ipph;j++){
371 t1+=t0;
372 t2-=t0;
373 t3=t1;
374 t4=t2;
375 for(k=0;k<l1;k++){
376 t5=t3;
377 t6=t4;
378 for(i=2;i<ido;i+=2){
379 t5+=2;
380 t6+=2;
381 c1[t5-1]=ch[t5-1]+ch[t6-1];
382 c1[t6-1]=ch[t5]-ch[t6];
383 c1[t5]=ch[t5]+ch[t6];
384 c1[t6]=ch[t6-1]-ch[t5-1];
385 }
386 t3+=ido;
387 t4+=ido;
388 }
389 }
390 }
391
392 L119:
393 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
394
395 t1=0;
396 t2=ipp2*idl1;
397 for(j=1;j<ipph;j++){
398 t1+=t0;
399 t2-=t0;
400 t3=t1-ido;
401 t4=t2-ido;
402 for(k=0;k<l1;k++){
403 t3+=ido;
404 t4+=ido;
405 c1[t3]=ch[t3]+ch[t4];
406 c1[t4]=ch[t4]-ch[t3];
407 }
408 }
409
410 ar1=1.f;
411 ai1=0.f;
412 t1=0;
413 t2=ipp2*idl1;
414 t3=(ip-1)*idl1;
415 for(l=1;l<ipph;l++){
416 t1+=idl1;
417 t2-=idl1;
418 ar1h=dcp*ar1-dsp*ai1;
419 ai1=dcp*ai1+dsp*ar1;
420 ar1=ar1h;
421 t4=t1;
422 t5=t2;
423 t6=t3;
424 t7=idl1;
425
426 for(ik=0;ik<idl1;ik++){
427 ch2[t4++]=c2[ik]+ar1*c2[t7++];
428 ch2[t5++]=ai1*c2[t6++];
429 }
430
431 dc2=ar1;
432 ds2=ai1;
433 ar2=ar1;
434 ai2=ai1;
435
436 t4=idl1;
437 t5=(ipp2-1)*idl1;
438 for(j=2;j<ipph;j++){
439 t4+=idl1;
440 t5-=idl1;
441
442 ar2h=dc2*ar2-ds2*ai2;
443 ai2=dc2*ai2+ds2*ar2;
444 ar2=ar2h;
445
446 t6=t1;
447 t7=t2;
448 t8=t4;
449 t9=t5;
450 for(ik=0;ik<idl1;ik++){
451 ch2[t6++]+=ar2*c2[t8++];
452 ch2[t7++]+=ai2*c2[t9++];
453 }
454 }
455 }
456
457 t1=0;
458 for(j=1;j<ipph;j++){
459 t1+=idl1;
460 t2=t1;
461 for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
462 }
463
464 if(ido<l1)goto L132;
465
466 t1=0;
467 t2=0;
468 for(k=0;k<l1;k++){
469 t3=t1;
470 t4=t2;
471 for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
472 t1+=ido;
473 t2+=t10;
474 }
475
476 goto L135;
477
478 L132:
479 for(i=0;i<ido;i++){
480 t1=i;
481 t2=i;
482 for(k=0;k<l1;k++){
483 cc[t2]=ch[t1];
484 t1+=ido;
485 t2+=t10;
486 }
487 }
488
489 L135:
490 t1=0;
491 t2=ido<<1;
492 t3=0;
493 t4=ipp2*t0;
494 for(j=1;j<ipph;j++){
495
496 t1+=t2;
497 t3+=t0;
498 t4-=t0;
499
500 t5=t1;
501 t6=t3;
502 t7=t4;
503
504 for(k=0;k<l1;k++){
505 cc[t5-1]=ch[t6];
506 cc[t5]=ch[t7];
507 t5+=t10;
508 t6+=ido;
509 t7+=ido;
510 }
511 }
512
513 if(ido==1)return;
514 if(nbd<l1)goto L141;
515
516 t1=-ido;
517 t3=0;
518 t4=0;
519 t5=ipp2*t0;
520 for(j=1;j<ipph;j++){
521 t1+=t2;
522 t3+=t2;
523 t4+=t0;
524 t5-=t0;
525 t6=t1;
526 t7=t3;
527 t8=t4;
528 t9=t5;
529 for(k=0;k<l1;k++){
530 for(i=2;i<ido;i+=2){
531 ic=idp2-i;
532 cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
533 cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
534 cc[i+t7]=ch[i+t8]+ch[i+t9];
535 cc[ic+t6]=ch[i+t9]-ch[i+t8];
536 }
537 t6+=t10;
538 t7+=t10;
539 t8+=ido;
540 t9+=ido;
541 }
542 }
543 return;
544
545 L141:
546
547 t1=-ido;
548 t3=0;
549 t4=0;
550 t5=ipp2*t0;
551 for(j=1;j<ipph;j++){
552 t1+=t2;
553 t3+=t2;
554 t4+=t0;
555 t5-=t0;
556 for(i=2;i<ido;i+=2){
557 t6=idp2+t1-i;
558 t7=i+t3;
559 t8=i+t4;
560 t9=i+t5;
561 for(k=0;k<l1;k++){
562 cc[t7-1]=ch[t8-1]+ch[t9-1];
563 cc[t6-1]=ch[t8-1]-ch[t9-1];
564 cc[t7]=ch[t8]+ch[t9];
565 cc[t6]=ch[t9]-ch[t8];
566 t6+=t10;
567 t7+=t10;
568 t8+=ido;
569 t9+=ido;
570 }
571 }
572 }
573 }
574
drftf1(int n,float * c,float * ch,float * wa,int * ifac)575 static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
576 int i,k1,l1,l2;
577 int na,kh,nf;
578 int ip,iw,ido,idl1,ix2,ix3;
579
580 nf=ifac[1];
581 na=1;
582 l2=n;
583 iw=n;
584
585 for(k1=0;k1<nf;k1++){
586 kh=nf-k1;
587 ip=ifac[kh+1];
588 l1=l2/ip;
589 ido=n/l2;
590 idl1=ido*l1;
591 iw-=(ip-1)*ido;
592 na=1-na;
593
594 if(ip!=4)goto L102;
595
596 ix2=iw+ido;
597 ix3=ix2+ido;
598 if(na!=0)
599 dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
600 else
601 dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
602 goto L110;
603
604 L102:
605 if(ip!=2)goto L104;
606 if(na!=0)goto L103;
607
608 dradf2(ido,l1,c,ch,wa+iw-1);
609 goto L110;
610
611 L103:
612 dradf2(ido,l1,ch,c,wa+iw-1);
613 goto L110;
614
615 L104:
616 if(ido==1)na=1-na;
617 if(na!=0)goto L109;
618
619 dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
620 na=1;
621 goto L110;
622
623 L109:
624 dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
625 na=0;
626
627 L110:
628 l2=l1;
629 }
630
631 if(na==1)return;
632
633 for(i=0;i<n;i++)c[i]=ch[i];
634 }
635
dradb2(int ido,int l1,float * cc,float * ch,float * wa1)636 static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
637 int i,k,t0,t1,t2,t3,t4,t5,t6;
638 float ti2,tr2;
639
640 t0=l1*ido;
641
642 t1=0;
643 t2=0;
644 t3=(ido<<1)-1;
645 for(k=0;k<l1;k++){
646 ch[t1]=cc[t2]+cc[t3+t2];
647 ch[t1+t0]=cc[t2]-cc[t3+t2];
648 t2=(t1+=ido)<<1;
649 }
650
651 if(ido<2)return;
652 if(ido==2)goto L105;
653
654 t1=0;
655 t2=0;
656 for(k=0;k<l1;k++){
657 t3=t1;
658 t5=(t4=t2)+(ido<<1);
659 t6=t0+t1;
660 for(i=2;i<ido;i+=2){
661 t3+=2;
662 t4+=2;
663 t5-=2;
664 t6+=2;
665 ch[t3-1]=cc[t4-1]+cc[t5-1];
666 tr2=cc[t4-1]-cc[t5-1];
667 ch[t3]=cc[t4]-cc[t5];
668 ti2=cc[t4]+cc[t5];
669 ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
670 ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
671 }
672 t2=(t1+=ido)<<1;
673 }
674
675 if(ido%2==1)return;
676
677 L105:
678 t1=ido-1;
679 t2=ido-1;
680 for(k=0;k<l1;k++){
681 ch[t1]=cc[t2]+cc[t2];
682 ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
683 t1+=ido;
684 t2+=ido<<1;
685 }
686 }
687
dradb3(int ido,int l1,float * cc,float * ch,float * wa1,float * wa2)688 static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
689 float *wa2){
690 static float taur = -.5f;
691 static float taui = .8660254037844386f;
692 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
693 float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
694 t0=l1*ido;
695
696 t1=0;
697 t2=t0<<1;
698 t3=ido<<1;
699 t4=ido+(ido<<1);
700 t5=0;
701 for(k=0;k<l1;k++){
702 tr2=cc[t3-1]+cc[t3-1];
703 cr2=cc[t5]+(taur*tr2);
704 ch[t1]=cc[t5]+tr2;
705 ci3=taui*(cc[t3]+cc[t3]);
706 ch[t1+t0]=cr2-ci3;
707 ch[t1+t2]=cr2+ci3;
708 t1+=ido;
709 t3+=t4;
710 t5+=t4;
711 }
712
713 if(ido==1)return;
714
715 t1=0;
716 t3=ido<<1;
717 for(k=0;k<l1;k++){
718 t7=t1+(t1<<1);
719 t6=(t5=t7+t3);
720 t8=t1;
721 t10=(t9=t1+t0)+t0;
722
723 for(i=2;i<ido;i+=2){
724 t5+=2;
725 t6-=2;
726 t7+=2;
727 t8+=2;
728 t9+=2;
729 t10+=2;
730 tr2=cc[t5-1]+cc[t6-1];
731 cr2=cc[t7-1]+(taur*tr2);
732 ch[t8-1]=cc[t7-1]+tr2;
733 ti2=cc[t5]-cc[t6];
734 ci2=cc[t7]+(taur*ti2);
735 ch[t8]=cc[t7]+ti2;
736 cr3=taui*(cc[t5-1]-cc[t6-1]);
737 ci3=taui*(cc[t5]+cc[t6]);
738 dr2=cr2-ci3;
739 dr3=cr2+ci3;
740 di2=ci2+cr3;
741 di3=ci2-cr3;
742 ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
743 ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
744 ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
745 ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
746 }
747 t1+=ido;
748 }
749 }
750
dradb4(int ido,int l1,float * cc,float * ch,float * wa1,float * wa2,float * wa3)751 static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
752 float *wa2,float *wa3){
753 static float sqrt2=1.414213562373095f;
754 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
755 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
756 t0=l1*ido;
757
758 t1=0;
759 t2=ido<<2;
760 t3=0;
761 t6=ido<<1;
762 for(k=0;k<l1;k++){
763 t4=t3+t6;
764 t5=t1;
765 tr3=cc[t4-1]+cc[t4-1];
766 tr4=cc[t4]+cc[t4];
767 tr1=cc[t3]-cc[(t4+=t6)-1];
768 tr2=cc[t3]+cc[t4-1];
769 ch[t5]=tr2+tr3;
770 ch[t5+=t0]=tr1-tr4;
771 ch[t5+=t0]=tr2-tr3;
772 ch[t5+=t0]=tr1+tr4;
773 t1+=ido;
774 t3+=t2;
775 }
776
777 if(ido<2)return;
778 if(ido==2)goto L105;
779
780 t1=0;
781 for(k=0;k<l1;k++){
782 t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
783 t7=t1;
784 for(i=2;i<ido;i+=2){
785 t2+=2;
786 t3+=2;
787 t4-=2;
788 t5-=2;
789 t7+=2;
790 ti1=cc[t2]+cc[t5];
791 ti2=cc[t2]-cc[t5];
792 ti3=cc[t3]-cc[t4];
793 tr4=cc[t3]+cc[t4];
794 tr1=cc[t2-1]-cc[t5-1];
795 tr2=cc[t2-1]+cc[t5-1];
796 ti4=cc[t3-1]-cc[t4-1];
797 tr3=cc[t3-1]+cc[t4-1];
798 ch[t7-1]=tr2+tr3;
799 cr3=tr2-tr3;
800 ch[t7]=ti2+ti3;
801 ci3=ti2-ti3;
802 cr2=tr1-tr4;
803 cr4=tr1+tr4;
804 ci2=ti1+ti4;
805 ci4=ti1-ti4;
806
807 ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
808 ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
809 ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
810 ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
811 ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
812 ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
813 }
814 t1+=ido;
815 }
816
817 if(ido%2 == 1)return;
818
819 L105:
820
821 t1=ido;
822 t2=ido<<2;
823 t3=ido-1;
824 t4=ido+(ido<<1);
825 for(k=0;k<l1;k++){
826 t5=t3;
827 ti1=cc[t1]+cc[t4];
828 ti2=cc[t4]-cc[t1];
829 tr1=cc[t1-1]-cc[t4-1];
830 tr2=cc[t1-1]+cc[t4-1];
831 ch[t5]=tr2+tr2;
832 ch[t5+=t0]=sqrt2*(tr1-ti1);
833 ch[t5+=t0]=ti2+ti2;
834 ch[t5+=t0]=-sqrt2*(tr1+ti1);
835
836 t3+=ido;
837 t1+=t2;
838 t4+=t2;
839 }
840 }
841
dradbg(int ido,int ip,int l1,int idl1,float * cc,float * c1,float * c2,float * ch,float * ch2,float * wa)842 static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
843 float *c2,float *ch,float *ch2,float *wa){
844 static float tpi=6.283185307179586f;
845 int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
846 t11,t12;
847 float dc2,ai1,ai2,ar1,ar2,ds2;
848 int nbd;
849 float dcp,arg,dsp,ar1h,ar2h;
850 int ipp2;
851
852 t10=ip*ido;
853 t0=l1*ido;
854 arg=tpi/(float)ip;
855 dcp=cos(arg);
856 dsp=sin(arg);
857 nbd=(ido-1)>>1;
858 ipp2=ip;
859 ipph=(ip+1)>>1;
860 if(ido<l1)goto L103;
861
862 t1=0;
863 t2=0;
864 for(k=0;k<l1;k++){
865 t3=t1;
866 t4=t2;
867 for(i=0;i<ido;i++){
868 ch[t3]=cc[t4];
869 t3++;
870 t4++;
871 }
872 t1+=ido;
873 t2+=t10;
874 }
875 goto L106;
876
877 L103:
878 t1=0;
879 for(i=0;i<ido;i++){
880 t2=t1;
881 t3=t1;
882 for(k=0;k<l1;k++){
883 ch[t2]=cc[t3];
884 t2+=ido;
885 t3+=t10;
886 }
887 t1++;
888 }
889
890 L106:
891 t1=0;
892 t2=ipp2*t0;
893 t7=(t5=ido<<1);
894 for(j=1;j<ipph;j++){
895 t1+=t0;
896 t2-=t0;
897 t3=t1;
898 t4=t2;
899 t6=t5;
900 for(k=0;k<l1;k++){
901 ch[t3]=cc[t6-1]+cc[t6-1];
902 ch[t4]=cc[t6]+cc[t6];
903 t3+=ido;
904 t4+=ido;
905 t6+=t10;
906 }
907 t5+=t7;
908 }
909
910 if (ido == 1)goto L116;
911 if(nbd<l1)goto L112;
912
913 t1=0;
914 t2=ipp2*t0;
915 t7=0;
916 for(j=1;j<ipph;j++){
917 t1+=t0;
918 t2-=t0;
919 t3=t1;
920 t4=t2;
921
922 t7+=(ido<<1);
923 t8=t7;
924 for(k=0;k<l1;k++){
925 t5=t3;
926 t6=t4;
927 t9=t8;
928 t11=t8;
929 for(i=2;i<ido;i+=2){
930 t5+=2;
931 t6+=2;
932 t9+=2;
933 t11-=2;
934 ch[t5-1]=cc[t9-1]+cc[t11-1];
935 ch[t6-1]=cc[t9-1]-cc[t11-1];
936 ch[t5]=cc[t9]-cc[t11];
937 ch[t6]=cc[t9]+cc[t11];
938 }
939 t3+=ido;
940 t4+=ido;
941 t8+=t10;
942 }
943 }
944 goto L116;
945
946 L112:
947 t1=0;
948 t2=ipp2*t0;
949 t7=0;
950 for(j=1;j<ipph;j++){
951 t1+=t0;
952 t2-=t0;
953 t3=t1;
954 t4=t2;
955 t7+=(ido<<1);
956 t8=t7;
957 t9=t7;
958 for(i=2;i<ido;i+=2){
959 t3+=2;
960 t4+=2;
961 t8+=2;
962 t9-=2;
963 t5=t3;
964 t6=t4;
965 t11=t8;
966 t12=t9;
967 for(k=0;k<l1;k++){
968 ch[t5-1]=cc[t11-1]+cc[t12-1];
969 ch[t6-1]=cc[t11-1]-cc[t12-1];
970 ch[t5]=cc[t11]-cc[t12];
971 ch[t6]=cc[t11]+cc[t12];
972 t5+=ido;
973 t6+=ido;
974 t11+=t10;
975 t12+=t10;
976 }
977 }
978 }
979
980 L116:
981 ar1=1.f;
982 ai1=0.f;
983 t1=0;
984 t9=(t2=ipp2*idl1);
985 t3=(ip-1)*idl1;
986 for(l=1;l<ipph;l++){
987 t1+=idl1;
988 t2-=idl1;
989
990 ar1h=dcp*ar1-dsp*ai1;
991 ai1=dcp*ai1+dsp*ar1;
992 ar1=ar1h;
993 t4=t1;
994 t5=t2;
995 t6=0;
996 t7=idl1;
997 t8=t3;
998 for(ik=0;ik<idl1;ik++){
999 c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
1000 c2[t5++]=ai1*ch2[t8++];
1001 }
1002 dc2=ar1;
1003 ds2=ai1;
1004 ar2=ar1;
1005 ai2=ai1;
1006
1007 t6=idl1;
1008 t7=t9-idl1;
1009 for(j=2;j<ipph;j++){
1010 t6+=idl1;
1011 t7-=idl1;
1012 ar2h=dc2*ar2-ds2*ai2;
1013 ai2=dc2*ai2+ds2*ar2;
1014 ar2=ar2h;
1015 t4=t1;
1016 t5=t2;
1017 t11=t6;
1018 t12=t7;
1019 for(ik=0;ik<idl1;ik++){
1020 c2[t4++]+=ar2*ch2[t11++];
1021 c2[t5++]+=ai2*ch2[t12++];
1022 }
1023 }
1024 }
1025
1026 t1=0;
1027 for(j=1;j<ipph;j++){
1028 t1+=idl1;
1029 t2=t1;
1030 for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1031 }
1032
1033 t1=0;
1034 t2=ipp2*t0;
1035 for(j=1;j<ipph;j++){
1036 t1+=t0;
1037 t2-=t0;
1038 t3=t1;
1039 t4=t2;
1040 for(k=0;k<l1;k++){
1041 ch[t3]=c1[t3]-c1[t4];
1042 ch[t4]=c1[t3]+c1[t4];
1043 t3+=ido;
1044 t4+=ido;
1045 }
1046 }
1047
1048 if(ido==1)goto L132;
1049 if(nbd<l1)goto L128;
1050
1051 t1=0;
1052 t2=ipp2*t0;
1053 for(j=1;j<ipph;j++){
1054 t1+=t0;
1055 t2-=t0;
1056 t3=t1;
1057 t4=t2;
1058 for(k=0;k<l1;k++){
1059 t5=t3;
1060 t6=t4;
1061 for(i=2;i<ido;i+=2){
1062 t5+=2;
1063 t6+=2;
1064 ch[t5-1]=c1[t5-1]-c1[t6];
1065 ch[t6-1]=c1[t5-1]+c1[t6];
1066 ch[t5]=c1[t5]+c1[t6-1];
1067 ch[t6]=c1[t5]-c1[t6-1];
1068 }
1069 t3+=ido;
1070 t4+=ido;
1071 }
1072 }
1073 goto L132;
1074
1075 L128:
1076 t1=0;
1077 t2=ipp2*t0;
1078 for(j=1;j<ipph;j++){
1079 t1+=t0;
1080 t2-=t0;
1081 t3=t1;
1082 t4=t2;
1083 for(i=2;i<ido;i+=2){
1084 t3+=2;
1085 t4+=2;
1086 t5=t3;
1087 t6=t4;
1088 for(k=0;k<l1;k++){
1089 ch[t5-1]=c1[t5-1]-c1[t6];
1090 ch[t6-1]=c1[t5-1]+c1[t6];
1091 ch[t5]=c1[t5]+c1[t6-1];
1092 ch[t6]=c1[t5]-c1[t6-1];
1093 t5+=ido;
1094 t6+=ido;
1095 }
1096 }
1097 }
1098
1099 L132:
1100 if(ido==1)return;
1101
1102 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1103
1104 t1=0;
1105 for(j=1;j<ip;j++){
1106 t2=(t1+=t0);
1107 for(k=0;k<l1;k++){
1108 c1[t2]=ch[t2];
1109 t2+=ido;
1110 }
1111 }
1112
1113 if(nbd>l1)goto L139;
1114
1115 is= -ido-1;
1116 t1=0;
1117 for(j=1;j<ip;j++){
1118 is+=ido;
1119 t1+=t0;
1120 idij=is;
1121 t2=t1;
1122 for(i=2;i<ido;i+=2){
1123 t2+=2;
1124 idij+=2;
1125 t3=t2;
1126 for(k=0;k<l1;k++){
1127 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1128 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1129 t3+=ido;
1130 }
1131 }
1132 }
1133 return;
1134
1135 L139:
1136 is= -ido-1;
1137 t1=0;
1138 for(j=1;j<ip;j++){
1139 is+=ido;
1140 t1+=t0;
1141 t2=t1;
1142 for(k=0;k<l1;k++){
1143 idij=is;
1144 t3=t2;
1145 for(i=2;i<ido;i+=2){
1146 idij+=2;
1147 t3+=2;
1148 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1149 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1150 }
1151 t2+=ido;
1152 }
1153 }
1154 }
1155
drftb1(int n,float * c,float * ch,float * wa,int * ifac)1156 static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1157 int i,k1,l1,l2;
1158 int na;
1159 int nf,ip,iw,ix2,ix3,ido,idl1;
1160
1161 nf=ifac[1];
1162 na=0;
1163 l1=1;
1164 iw=1;
1165
1166 for(k1=0;k1<nf;k1++){
1167 ip=ifac[k1 + 2];
1168 l2=ip*l1;
1169 ido=n/l2;
1170 idl1=ido*l1;
1171 if(ip!=4)goto L103;
1172 ix2=iw+ido;
1173 ix3=ix2+ido;
1174
1175 if(na!=0)
1176 dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1177 else
1178 dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1179 na=1-na;
1180 goto L115;
1181
1182 L103:
1183 if(ip!=2)goto L106;
1184
1185 if(na!=0)
1186 dradb2(ido,l1,ch,c,wa+iw-1);
1187 else
1188 dradb2(ido,l1,c,ch,wa+iw-1);
1189 na=1-na;
1190 goto L115;
1191
1192 L106:
1193 if(ip!=3)goto L109;
1194
1195 ix2=iw+ido;
1196 if(na!=0)
1197 dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1198 else
1199 dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1200 na=1-na;
1201 goto L115;
1202
1203 L109:
1204 /* The radix five case can be translated later..... */
1205 /* if(ip!=5)goto L112;
1206
1207 ix2=iw+ido;
1208 ix3=ix2+ido;
1209 ix4=ix3+ido;
1210 if(na!=0)
1211 dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1212 else
1213 dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1214 na=1-na;
1215 goto L115;
1216
1217 L112:*/
1218 if(na!=0)
1219 dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1220 else
1221 dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1222 if(ido==1)na=1-na;
1223
1224 L115:
1225 l1=l2;
1226 iw+=(ip-1)*ido;
1227 }
1228
1229 if(na==0)return;
1230
1231 for(i=0;i<n;i++)c[i]=ch[i];
1232 }
1233
spx_drft_forward(struct drft_lookup * l,float * data)1234 void spx_drft_forward(struct drft_lookup *l,float *data){
1235 if(l->n==1)return;
1236 drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1237 }
1238
spx_drft_backward(struct drft_lookup * l,float * data)1239 void spx_drft_backward(struct drft_lookup *l,float *data){
1240 if (l->n==1)return;
1241 drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1242 }
1243
spx_drft_init(struct drft_lookup * l,int n)1244 void spx_drft_init(struct drft_lookup *l,int n)
1245 {
1246 l->n=n;
1247 l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache));
1248 l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache));
1249 fdrffti(n, l->trigcache, l->splitcache);
1250 }
1251
spx_drft_clear(struct drft_lookup * l)1252 void spx_drft_clear(struct drft_lookup *l)
1253 {
1254 if(l)
1255 {
1256 if(l->trigcache)
1257 speex_free(l->trigcache);
1258 if(l->splitcache)
1259 speex_free(l->splitcache);
1260 }
1261 }
1262