1 /*
2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3 * Copyright Takuya OOURA, 1996-2001
4 *
5 * You may use, copy, modify and distribute this code for any purpose (include
6 * commercial use) and without fee. Please refer to this package when you modify
7 * this code.
8 *
9 * Changes:
10 * Trivial type modifications by the WebRTC authors.
11 */
12
13 /*
14 Fast Fourier/Cosine/Sine Transform
15 dimension :one
16 data length :power of 2
17 decimation :frequency
18 radix :4, 2
19 data :inplace
20 table :use
21 functions
22 cdft: Complex Discrete Fourier Transform
23 rdft: Real Discrete Fourier Transform
24 ddct: Discrete Cosine Transform
25 ddst: Discrete Sine Transform
26 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
27 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
28 function prototypes
29 void cdft(int, int, float *, int *, float *);
30 void rdft(size_t, int, float *, size_t *, float *);
31 void ddct(int, int, float *, int *, float *);
32 void ddst(int, int, float *, int *, float *);
33 void dfct(int, float *, float *, int *, float *);
34 void dfst(int, float *, float *, int *, float *);
35
36
37 -------- Complex DFT (Discrete Fourier Transform) --------
38 [definition]
39 <case1>
40 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
41 <case2>
42 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
43 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
44 [usage]
45 <case1>
46 ip[0] = 0; // first time only
47 cdft(2*n, 1, a, ip, w);
48 <case2>
49 ip[0] = 0; // first time only
50 cdft(2*n, -1, a, ip, w);
51 [parameters]
52 2*n :data length (int)
53 n >= 1, n = power of 2
54 a[0...2*n-1] :input/output data (float *)
55 input data
56 a[2*j] = Re(x[j]),
57 a[2*j+1] = Im(x[j]), 0<=j<n
58 output data
59 a[2*k] = Re(X[k]),
60 a[2*k+1] = Im(X[k]), 0<=k<n
61 ip[0...*] :work area for bit reversal (int *)
62 length of ip >= 2+sqrt(n)
63 strictly,
64 length of ip >=
65 2+(1<<(int)(log(n+0.5)/log(2))/2).
66 ip[0],ip[1] are pointers of the cos/sin table.
67 w[0...n/2-1] :cos/sin table (float *)
68 w[],ip[] are initialized if ip[0] == 0.
69 [remark]
70 Inverse of
71 cdft(2*n, -1, a, ip, w);
72 is
73 cdft(2*n, 1, a, ip, w);
74 for (j = 0; j <= 2 * n - 1; j++) {
75 a[j] *= 1.0 / n;
76 }
77 .
78
79
80 -------- Real DFT / Inverse of Real DFT --------
81 [definition]
82 <case1> RDFT
83 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
84 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
85 <case2> IRDFT (excluding scale)
86 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
87 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
88 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
89 [usage]
90 <case1>
91 ip[0] = 0; // first time only
92 rdft(n, 1, a, ip, w);
93 <case2>
94 ip[0] = 0; // first time only
95 rdft(n, -1, a, ip, w);
96 [parameters]
97 n :data length (size_t)
98 n >= 2, n = power of 2
99 a[0...n-1] :input/output data (float *)
100 <case1>
101 output data
102 a[2*k] = R[k], 0<=k<n/2
103 a[2*k+1] = I[k], 0<k<n/2
104 a[1] = R[n/2]
105 <case2>
106 input data
107 a[2*j] = R[j], 0<=j<n/2
108 a[2*j+1] = I[j], 0<j<n/2
109 a[1] = R[n/2]
110 ip[0...*] :work area for bit reversal (size_t *)
111 length of ip >= 2+sqrt(n/2)
112 strictly,
113 length of ip >=
114 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
115 ip[0],ip[1] are pointers of the cos/sin table.
116 w[0...n/2-1] :cos/sin table (float *)
117 w[],ip[] are initialized if ip[0] == 0.
118 [remark]
119 Inverse of
120 rdft(n, 1, a, ip, w);
121 is
122 rdft(n, -1, a, ip, w);
123 for (j = 0; j <= n - 1; j++) {
124 a[j] *= 2.0 / n;
125 }
126 .
127
128
129 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
130 [definition]
131 <case1> IDCT (excluding scale)
132 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
133 <case2> DCT
134 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
135 [usage]
136 <case1>
137 ip[0] = 0; // first time only
138 ddct(n, 1, a, ip, w);
139 <case2>
140 ip[0] = 0; // first time only
141 ddct(n, -1, a, ip, w);
142 [parameters]
143 n :data length (int)
144 n >= 2, n = power of 2
145 a[0...n-1] :input/output data (float *)
146 output data
147 a[k] = C[k], 0<=k<n
148 ip[0...*] :work area for bit reversal (int *)
149 length of ip >= 2+sqrt(n/2)
150 strictly,
151 length of ip >=
152 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
153 ip[0],ip[1] are pointers of the cos/sin table.
154 w[0...n*5/4-1] :cos/sin table (float *)
155 w[],ip[] are initialized if ip[0] == 0.
156 [remark]
157 Inverse of
158 ddct(n, -1, a, ip, w);
159 is
160 a[0] *= 0.5;
161 ddct(n, 1, a, ip, w);
162 for (j = 0; j <= n - 1; j++) {
163 a[j] *= 2.0 / n;
164 }
165 .
166
167
168 -------- DST (Discrete Sine Transform) / Inverse of DST --------
169 [definition]
170 <case1> IDST (excluding scale)
171 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
172 <case2> DST
173 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
174 [usage]
175 <case1>
176 ip[0] = 0; // first time only
177 ddst(n, 1, a, ip, w);
178 <case2>
179 ip[0] = 0; // first time only
180 ddst(n, -1, a, ip, w);
181 [parameters]
182 n :data length (int)
183 n >= 2, n = power of 2
184 a[0...n-1] :input/output data (float *)
185 <case1>
186 input data
187 a[j] = A[j], 0<j<n
188 a[0] = A[n]
189 output data
190 a[k] = S[k], 0<=k<n
191 <case2>
192 output data
193 a[k] = S[k], 0<k<n
194 a[0] = S[n]
195 ip[0...*] :work area for bit reversal (int *)
196 length of ip >= 2+sqrt(n/2)
197 strictly,
198 length of ip >=
199 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
200 ip[0],ip[1] are pointers of the cos/sin table.
201 w[0...n*5/4-1] :cos/sin table (float *)
202 w[],ip[] are initialized if ip[0] == 0.
203 [remark]
204 Inverse of
205 ddst(n, -1, a, ip, w);
206 is
207 a[0] *= 0.5;
208 ddst(n, 1, a, ip, w);
209 for (j = 0; j <= n - 1; j++) {
210 a[j] *= 2.0 / n;
211 }
212 .
213
214
215 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
216 [definition]
217 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
218 [usage]
219 ip[0] = 0; // first time only
220 dfct(n, a, t, ip, w);
221 [parameters]
222 n :data length - 1 (int)
223 n >= 2, n = power of 2
224 a[0...n] :input/output data (float *)
225 output data
226 a[k] = C[k], 0<=k<=n
227 t[0...n/2] :work area (float *)
228 ip[0...*] :work area for bit reversal (int *)
229 length of ip >= 2+sqrt(n/4)
230 strictly,
231 length of ip >=
232 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
233 ip[0],ip[1] are pointers of the cos/sin table.
234 w[0...n*5/8-1] :cos/sin table (float *)
235 w[],ip[] are initialized if ip[0] == 0.
236 [remark]
237 Inverse of
238 a[0] *= 0.5;
239 a[n] *= 0.5;
240 dfct(n, a, t, ip, w);
241 is
242 a[0] *= 0.5;
243 a[n] *= 0.5;
244 dfct(n, a, t, ip, w);
245 for (j = 0; j <= n; j++) {
246 a[j] *= 2.0 / n;
247 }
248 .
249
250
251 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
252 [definition]
253 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
254 [usage]
255 ip[0] = 0; // first time only
256 dfst(n, a, t, ip, w);
257 [parameters]
258 n :data length + 1 (int)
259 n >= 2, n = power of 2
260 a[0...n-1] :input/output data (float *)
261 output data
262 a[k] = S[k], 0<k<n
263 (a[0] is used for work area)
264 t[0...n/2-1] :work area (float *)
265 ip[0...*] :work area for bit reversal (int *)
266 length of ip >= 2+sqrt(n/4)
267 strictly,
268 length of ip >=
269 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
270 ip[0],ip[1] are pointers of the cos/sin table.
271 w[0...n*5/8-1] :cos/sin table (float *)
272 w[],ip[] are initialized if ip[0] == 0.
273 [remark]
274 Inverse of
275 dfst(n, a, t, ip, w);
276 is
277 dfst(n, a, t, ip, w);
278 for (j = 1; j <= n - 1; j++) {
279 a[j] *= 2.0 / n;
280 }
281 .
282
283
284 Appendix :
285 The cos/sin table is recalculated when the larger table required.
286 w[] and ip[] are compatible with all routines.
287 */
288
289 #include <stddef.h>
290
291 static void makewt(size_t nw, size_t *ip, float *w);
292 static void makect(size_t nc, size_t *ip, float *c);
293 static void bitrv2(size_t n, size_t *ip, float *a);
294 #if 0 // Not used.
295 static void bitrv2conj(int n, int *ip, float *a);
296 #endif
297 static void cftfsub(size_t n, float *a, float *w);
298 static void cftbsub(size_t n, float *a, float *w);
299 static void cft1st(size_t n, float *a, float *w);
300 static void cftmdl(size_t n, size_t l, float *a, float *w);
301 static void rftfsub(size_t n, float *a, size_t nc, float *c);
302 static void rftbsub(size_t n, float *a, size_t nc, float *c);
303 #if 0 // Not used.
304 static void dctsub(int n, float *a, int nc, float *c)
305 static void dstsub(int n, float *a, int nc, float *c)
306 #endif
307
308
309 #if 0 // Not used.
310 void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w)
311 {
312 if (n > (ip[0] << 2)) {
313 makewt(n >> 2, ip, w);
314 }
315 if (n > 4) {
316 if (isgn >= 0) {
317 bitrv2(n, ip + 2, a);
318 cftfsub(n, a, w);
319 } else {
320 bitrv2conj(n, ip + 2, a);
321 cftbsub(n, a, w);
322 }
323 } else if (n == 4) {
324 cftfsub(n, a, w);
325 }
326 }
327 #endif
328
329
WebRtc_rdft(size_t n,int isgn,float * a,size_t * ip,float * w)330 void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w)
331 {
332 size_t nw, nc;
333 float xi;
334
335 nw = ip[0];
336 if (n > (nw << 2)) {
337 nw = n >> 2;
338 makewt(nw, ip, w);
339 }
340 nc = ip[1];
341 if (n > (nc << 2)) {
342 nc = n >> 2;
343 makect(nc, ip, w + nw);
344 }
345 if (isgn >= 0) {
346 if (n > 4) {
347 bitrv2(n, ip + 2, a);
348 cftfsub(n, a, w);
349 rftfsub(n, a, nc, w + nw);
350 } else if (n == 4) {
351 cftfsub(n, a, w);
352 }
353 xi = a[0] - a[1];
354 a[0] += a[1];
355 a[1] = xi;
356 } else {
357 a[1] = 0.5f * (a[0] - a[1]);
358 a[0] -= a[1];
359 if (n > 4) {
360 rftbsub(n, a, nc, w + nw);
361 bitrv2(n, ip + 2, a);
362 cftbsub(n, a, w);
363 } else if (n == 4) {
364 cftfsub(n, a, w);
365 }
366 }
367 }
368
369 #if 0 // Not used.
370 static void ddct(int n, int isgn, float *a, int *ip, float *w)
371 {
372 int j, nw, nc;
373 float xr;
374
375 nw = ip[0];
376 if (n > (nw << 2)) {
377 nw = n >> 2;
378 makewt(nw, ip, w);
379 }
380 nc = ip[1];
381 if (n > nc) {
382 nc = n;
383 makect(nc, ip, w + nw);
384 }
385 if (isgn < 0) {
386 xr = a[n - 1];
387 for (j = n - 2; j >= 2; j -= 2) {
388 a[j + 1] = a[j] - a[j - 1];
389 a[j] += a[j - 1];
390 }
391 a[1] = a[0] - xr;
392 a[0] += xr;
393 if (n > 4) {
394 rftbsub(n, a, nc, w + nw);
395 bitrv2(n, ip + 2, a);
396 cftbsub(n, a, w);
397 } else if (n == 4) {
398 cftfsub(n, a, w);
399 }
400 }
401 dctsub(n, a, nc, w + nw);
402 if (isgn >= 0) {
403 if (n > 4) {
404 bitrv2(n, ip + 2, a);
405 cftfsub(n, a, w);
406 rftfsub(n, a, nc, w + nw);
407 } else if (n == 4) {
408 cftfsub(n, a, w);
409 }
410 xr = a[0] - a[1];
411 a[0] += a[1];
412 for (j = 2; j < n; j += 2) {
413 a[j - 1] = a[j] - a[j + 1];
414 a[j] += a[j + 1];
415 }
416 a[n - 1] = xr;
417 }
418 }
419
420
421 static void ddst(int n, int isgn, float *a, int *ip, float *w)
422 {
423 int j, nw, nc;
424 float xr;
425
426 nw = ip[0];
427 if (n > (nw << 2)) {
428 nw = n >> 2;
429 makewt(nw, ip, w);
430 }
431 nc = ip[1];
432 if (n > nc) {
433 nc = n;
434 makect(nc, ip, w + nw);
435 }
436 if (isgn < 0) {
437 xr = a[n - 1];
438 for (j = n - 2; j >= 2; j -= 2) {
439 a[j + 1] = -a[j] - a[j - 1];
440 a[j] -= a[j - 1];
441 }
442 a[1] = a[0] + xr;
443 a[0] -= xr;
444 if (n > 4) {
445 rftbsub(n, a, nc, w + nw);
446 bitrv2(n, ip + 2, a);
447 cftbsub(n, a, w);
448 } else if (n == 4) {
449 cftfsub(n, a, w);
450 }
451 }
452 dstsub(n, a, nc, w + nw);
453 if (isgn >= 0) {
454 if (n > 4) {
455 bitrv2(n, ip + 2, a);
456 cftfsub(n, a, w);
457 rftfsub(n, a, nc, w + nw);
458 } else if (n == 4) {
459 cftfsub(n, a, w);
460 }
461 xr = a[0] - a[1];
462 a[0] += a[1];
463 for (j = 2; j < n; j += 2) {
464 a[j - 1] = -a[j] - a[j + 1];
465 a[j] -= a[j + 1];
466 }
467 a[n - 1] = -xr;
468 }
469 }
470
471
472 static void dfct(int n, float *a, float *t, int *ip, float *w)
473 {
474 int j, k, l, m, mh, nw, nc;
475 float xr, xi, yr, yi;
476
477 nw = ip[0];
478 if (n > (nw << 3)) {
479 nw = n >> 3;
480 makewt(nw, ip, w);
481 }
482 nc = ip[1];
483 if (n > (nc << 1)) {
484 nc = n >> 1;
485 makect(nc, ip, w + nw);
486 }
487 m = n >> 1;
488 yi = a[m];
489 xi = a[0] + a[n];
490 a[0] -= a[n];
491 t[0] = xi - yi;
492 t[m] = xi + yi;
493 if (n > 2) {
494 mh = m >> 1;
495 for (j = 1; j < mh; j++) {
496 k = m - j;
497 xr = a[j] - a[n - j];
498 xi = a[j] + a[n - j];
499 yr = a[k] - a[n - k];
500 yi = a[k] + a[n - k];
501 a[j] = xr;
502 a[k] = yr;
503 t[j] = xi - yi;
504 t[k] = xi + yi;
505 }
506 t[mh] = a[mh] + a[n - mh];
507 a[mh] -= a[n - mh];
508 dctsub(m, a, nc, w + nw);
509 if (m > 4) {
510 bitrv2(m, ip + 2, a);
511 cftfsub(m, a, w);
512 rftfsub(m, a, nc, w + nw);
513 } else if (m == 4) {
514 cftfsub(m, a, w);
515 }
516 a[n - 1] = a[0] - a[1];
517 a[1] = a[0] + a[1];
518 for (j = m - 2; j >= 2; j -= 2) {
519 a[2 * j + 1] = a[j] + a[j + 1];
520 a[2 * j - 1] = a[j] - a[j + 1];
521 }
522 l = 2;
523 m = mh;
524 while (m >= 2) {
525 dctsub(m, t, nc, w + nw);
526 if (m > 4) {
527 bitrv2(m, ip + 2, t);
528 cftfsub(m, t, w);
529 rftfsub(m, t, nc, w + nw);
530 } else if (m == 4) {
531 cftfsub(m, t, w);
532 }
533 a[n - l] = t[0] - t[1];
534 a[l] = t[0] + t[1];
535 k = 0;
536 for (j = 2; j < m; j += 2) {
537 k += l << 2;
538 a[k - l] = t[j] - t[j + 1];
539 a[k + l] = t[j] + t[j + 1];
540 }
541 l <<= 1;
542 mh = m >> 1;
543 for (j = 0; j < mh; j++) {
544 k = m - j;
545 t[j] = t[m + k] - t[m + j];
546 t[k] = t[m + k] + t[m + j];
547 }
548 t[mh] = t[m + mh];
549 m = mh;
550 }
551 a[l] = t[0];
552 a[n] = t[2] - t[1];
553 a[0] = t[2] + t[1];
554 } else {
555 a[1] = a[0];
556 a[2] = t[0];
557 a[0] = t[1];
558 }
559 }
560
561 static void dfst(int n, float *a, float *t, int *ip, float *w)
562 {
563 int j, k, l, m, mh, nw, nc;
564 float xr, xi, yr, yi;
565
566 nw = ip[0];
567 if (n > (nw << 3)) {
568 nw = n >> 3;
569 makewt(nw, ip, w);
570 }
571 nc = ip[1];
572 if (n > (nc << 1)) {
573 nc = n >> 1;
574 makect(nc, ip, w + nw);
575 }
576 if (n > 2) {
577 m = n >> 1;
578 mh = m >> 1;
579 for (j = 1; j < mh; j++) {
580 k = m - j;
581 xr = a[j] + a[n - j];
582 xi = a[j] - a[n - j];
583 yr = a[k] + a[n - k];
584 yi = a[k] - a[n - k];
585 a[j] = xr;
586 a[k] = yr;
587 t[j] = xi + yi;
588 t[k] = xi - yi;
589 }
590 t[0] = a[mh] - a[n - mh];
591 a[mh] += a[n - mh];
592 a[0] = a[m];
593 dstsub(m, a, nc, w + nw);
594 if (m > 4) {
595 bitrv2(m, ip + 2, a);
596 cftfsub(m, a, w);
597 rftfsub(m, a, nc, w + nw);
598 } else if (m == 4) {
599 cftfsub(m, a, w);
600 }
601 a[n - 1] = a[1] - a[0];
602 a[1] = a[0] + a[1];
603 for (j = m - 2; j >= 2; j -= 2) {
604 a[2 * j + 1] = a[j] - a[j + 1];
605 a[2 * j - 1] = -a[j] - a[j + 1];
606 }
607 l = 2;
608 m = mh;
609 while (m >= 2) {
610 dstsub(m, t, nc, w + nw);
611 if (m > 4) {
612 bitrv2(m, ip + 2, t);
613 cftfsub(m, t, w);
614 rftfsub(m, t, nc, w + nw);
615 } else if (m == 4) {
616 cftfsub(m, t, w);
617 }
618 a[n - l] = t[1] - t[0];
619 a[l] = t[0] + t[1];
620 k = 0;
621 for (j = 2; j < m; j += 2) {
622 k += l << 2;
623 a[k - l] = -t[j] - t[j + 1];
624 a[k + l] = t[j] - t[j + 1];
625 }
626 l <<= 1;
627 mh = m >> 1;
628 for (j = 1; j < mh; j++) {
629 k = m - j;
630 t[j] = t[m + k] + t[m + j];
631 t[k] = t[m + k] - t[m + j];
632 }
633 t[0] = t[m + mh];
634 m = mh;
635 }
636 a[l] = t[0];
637 }
638 a[0] = 0;
639 }
640 #endif // Not used.
641
642
643 /* -------- initializing routines -------- */
644
645
646 #include <math.h>
647
makewt(size_t nw,size_t * ip,float * w)648 static void makewt(size_t nw, size_t *ip, float *w)
649 {
650 size_t j, nwh;
651 float delta, x, y;
652
653 ip[0] = nw;
654 ip[1] = 1;
655 if (nw > 2) {
656 nwh = nw >> 1;
657 delta = atanf(1.0f) / nwh;
658 w[0] = 1;
659 w[1] = 0;
660 w[nwh] = (float)cos(delta * nwh);
661 w[nwh + 1] = w[nwh];
662 if (nwh > 2) {
663 for (j = 2; j < nwh; j += 2) {
664 x = (float)cos(delta * j);
665 y = (float)sin(delta * j);
666 w[j] = x;
667 w[j + 1] = y;
668 w[nw - j] = y;
669 w[nw - j + 1] = x;
670 }
671 bitrv2(nw, ip + 2, w);
672 }
673 }
674 }
675
676
makect(size_t nc,size_t * ip,float * c)677 static void makect(size_t nc, size_t *ip, float *c)
678 {
679 size_t j, nch;
680 float delta;
681
682 ip[1] = nc;
683 if (nc > 1) {
684 nch = nc >> 1;
685 delta = atanf(1.0f) / nch;
686 c[0] = (float)cos(delta * nch);
687 c[nch] = 0.5f * c[0];
688 for (j = 1; j < nch; j++) {
689 c[j] = 0.5f * (float)cos(delta * j);
690 c[nc - j] = 0.5f * (float)sin(delta * j);
691 }
692 }
693 }
694
695
696 /* -------- child routines -------- */
697
698
bitrv2(size_t n,size_t * ip,float * a)699 static void bitrv2(size_t n, size_t *ip, float *a)
700 {
701 size_t j, j1, k, k1, l, m, m2;
702 float xr, xi, yr, yi;
703
704 ip[0] = 0;
705 l = n;
706 m = 1;
707 while ((m << 3) < l) {
708 l >>= 1;
709 for (j = 0; j < m; j++) {
710 ip[m + j] = ip[j] + l;
711 }
712 m <<= 1;
713 }
714 m2 = 2 * m;
715 if ((m << 3) == l) {
716 for (k = 0; k < m; k++) {
717 for (j = 0; j < k; j++) {
718 j1 = 2 * j + ip[k];
719 k1 = 2 * k + ip[j];
720 xr = a[j1];
721 xi = a[j1 + 1];
722 yr = a[k1];
723 yi = a[k1 + 1];
724 a[j1] = yr;
725 a[j1 + 1] = yi;
726 a[k1] = xr;
727 a[k1 + 1] = xi;
728 j1 += m2;
729 k1 += 2 * m2;
730 xr = a[j1];
731 xi = a[j1 + 1];
732 yr = a[k1];
733 yi = a[k1 + 1];
734 a[j1] = yr;
735 a[j1 + 1] = yi;
736 a[k1] = xr;
737 a[k1 + 1] = xi;
738 j1 += m2;
739 k1 -= m2;
740 xr = a[j1];
741 xi = a[j1 + 1];
742 yr = a[k1];
743 yi = a[k1 + 1];
744 a[j1] = yr;
745 a[j1 + 1] = yi;
746 a[k1] = xr;
747 a[k1 + 1] = xi;
748 j1 += m2;
749 k1 += 2 * m2;
750 xr = a[j1];
751 xi = a[j1 + 1];
752 yr = a[k1];
753 yi = a[k1 + 1];
754 a[j1] = yr;
755 a[j1 + 1] = yi;
756 a[k1] = xr;
757 a[k1 + 1] = xi;
758 }
759 j1 = 2 * k + m2 + ip[k];
760 k1 = j1 + m2;
761 xr = a[j1];
762 xi = a[j1 + 1];
763 yr = a[k1];
764 yi = a[k1 + 1];
765 a[j1] = yr;
766 a[j1 + 1] = yi;
767 a[k1] = xr;
768 a[k1 + 1] = xi;
769 }
770 } else {
771 for (k = 1; k < m; k++) {
772 for (j = 0; j < k; j++) {
773 j1 = 2 * j + ip[k];
774 k1 = 2 * k + ip[j];
775 xr = a[j1];
776 xi = a[j1 + 1];
777 yr = a[k1];
778 yi = a[k1 + 1];
779 a[j1] = yr;
780 a[j1 + 1] = yi;
781 a[k1] = xr;
782 a[k1 + 1] = xi;
783 j1 += m2;
784 k1 += m2;
785 xr = a[j1];
786 xi = a[j1 + 1];
787 yr = a[k1];
788 yi = a[k1 + 1];
789 a[j1] = yr;
790 a[j1 + 1] = yi;
791 a[k1] = xr;
792 a[k1 + 1] = xi;
793 }
794 }
795 }
796 }
797
798 #if 0 // Not used.
799 static void bitrv2conj(int n, int *ip, float *a)
800 {
801 int j, j1, k, k1, l, m, m2;
802 float xr, xi, yr, yi;
803
804 ip[0] = 0;
805 l = n;
806 m = 1;
807 while ((m << 3) < l) {
808 l >>= 1;
809 for (j = 0; j < m; j++) {
810 ip[m + j] = ip[j] + l;
811 }
812 m <<= 1;
813 }
814 m2 = 2 * m;
815 if ((m << 3) == l) {
816 for (k = 0; k < m; k++) {
817 for (j = 0; j < k; j++) {
818 j1 = 2 * j + ip[k];
819 k1 = 2 * k + ip[j];
820 xr = a[j1];
821 xi = -a[j1 + 1];
822 yr = a[k1];
823 yi = -a[k1 + 1];
824 a[j1] = yr;
825 a[j1 + 1] = yi;
826 a[k1] = xr;
827 a[k1 + 1] = xi;
828 j1 += m2;
829 k1 += 2 * m2;
830 xr = a[j1];
831 xi = -a[j1 + 1];
832 yr = a[k1];
833 yi = -a[k1 + 1];
834 a[j1] = yr;
835 a[j1 + 1] = yi;
836 a[k1] = xr;
837 a[k1 + 1] = xi;
838 j1 += m2;
839 k1 -= m2;
840 xr = a[j1];
841 xi = -a[j1 + 1];
842 yr = a[k1];
843 yi = -a[k1 + 1];
844 a[j1] = yr;
845 a[j1 + 1] = yi;
846 a[k1] = xr;
847 a[k1 + 1] = xi;
848 j1 += m2;
849 k1 += 2 * m2;
850 xr = a[j1];
851 xi = -a[j1 + 1];
852 yr = a[k1];
853 yi = -a[k1 + 1];
854 a[j1] = yr;
855 a[j1 + 1] = yi;
856 a[k1] = xr;
857 a[k1 + 1] = xi;
858 }
859 k1 = 2 * k + ip[k];
860 a[k1 + 1] = -a[k1 + 1];
861 j1 = k1 + m2;
862 k1 = j1 + m2;
863 xr = a[j1];
864 xi = -a[j1 + 1];
865 yr = a[k1];
866 yi = -a[k1 + 1];
867 a[j1] = yr;
868 a[j1 + 1] = yi;
869 a[k1] = xr;
870 a[k1 + 1] = xi;
871 k1 += m2;
872 a[k1 + 1] = -a[k1 + 1];
873 }
874 } else {
875 a[1] = -a[1];
876 a[m2 + 1] = -a[m2 + 1];
877 for (k = 1; k < m; k++) {
878 for (j = 0; j < k; j++) {
879 j1 = 2 * j + ip[k];
880 k1 = 2 * k + ip[j];
881 xr = a[j1];
882 xi = -a[j1 + 1];
883 yr = a[k1];
884 yi = -a[k1 + 1];
885 a[j1] = yr;
886 a[j1 + 1] = yi;
887 a[k1] = xr;
888 a[k1 + 1] = xi;
889 j1 += m2;
890 k1 += m2;
891 xr = a[j1];
892 xi = -a[j1 + 1];
893 yr = a[k1];
894 yi = -a[k1 + 1];
895 a[j1] = yr;
896 a[j1 + 1] = yi;
897 a[k1] = xr;
898 a[k1 + 1] = xi;
899 }
900 k1 = 2 * k + ip[k];
901 a[k1 + 1] = -a[k1 + 1];
902 a[k1 + m2 + 1] = -a[k1 + m2 + 1];
903 }
904 }
905 }
906 #endif
907
cftfsub(size_t n,float * a,float * w)908 static void cftfsub(size_t n, float *a, float *w)
909 {
910 size_t j, j1, j2, j3, l;
911 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
912
913 l = 2;
914 if (n > 8) {
915 cft1st(n, a, w);
916 l = 8;
917 while ((l << 2) < n) {
918 cftmdl(n, l, a, w);
919 l <<= 2;
920 }
921 }
922 if ((l << 2) == n) {
923 for (j = 0; j < l; j += 2) {
924 j1 = j + l;
925 j2 = j1 + l;
926 j3 = j2 + l;
927 x0r = a[j] + a[j1];
928 x0i = a[j + 1] + a[j1 + 1];
929 x1r = a[j] - a[j1];
930 x1i = a[j + 1] - a[j1 + 1];
931 x2r = a[j2] + a[j3];
932 x2i = a[j2 + 1] + a[j3 + 1];
933 x3r = a[j2] - a[j3];
934 x3i = a[j2 + 1] - a[j3 + 1];
935 a[j] = x0r + x2r;
936 a[j + 1] = x0i + x2i;
937 a[j2] = x0r - x2r;
938 a[j2 + 1] = x0i - x2i;
939 a[j1] = x1r - x3i;
940 a[j1 + 1] = x1i + x3r;
941 a[j3] = x1r + x3i;
942 a[j3 + 1] = x1i - x3r;
943 }
944 } else {
945 for (j = 0; j < l; j += 2) {
946 j1 = j + l;
947 x0r = a[j] - a[j1];
948 x0i = a[j + 1] - a[j1 + 1];
949 a[j] += a[j1];
950 a[j + 1] += a[j1 + 1];
951 a[j1] = x0r;
952 a[j1 + 1] = x0i;
953 }
954 }
955 }
956
957
cftbsub(size_t n,float * a,float * w)958 static void cftbsub(size_t n, float *a, float *w)
959 {
960 size_t j, j1, j2, j3, l;
961 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
962
963 l = 2;
964 if (n > 8) {
965 cft1st(n, a, w);
966 l = 8;
967 while ((l << 2) < n) {
968 cftmdl(n, l, a, w);
969 l <<= 2;
970 }
971 }
972 if ((l << 2) == n) {
973 for (j = 0; j < l; j += 2) {
974 j1 = j + l;
975 j2 = j1 + l;
976 j3 = j2 + l;
977 x0r = a[j] + a[j1];
978 x0i = -a[j + 1] - a[j1 + 1];
979 x1r = a[j] - a[j1];
980 x1i = -a[j + 1] + a[j1 + 1];
981 x2r = a[j2] + a[j3];
982 x2i = a[j2 + 1] + a[j3 + 1];
983 x3r = a[j2] - a[j3];
984 x3i = a[j2 + 1] - a[j3 + 1];
985 a[j] = x0r + x2r;
986 a[j + 1] = x0i - x2i;
987 a[j2] = x0r - x2r;
988 a[j2 + 1] = x0i + x2i;
989 a[j1] = x1r - x3i;
990 a[j1 + 1] = x1i - x3r;
991 a[j3] = x1r + x3i;
992 a[j3 + 1] = x1i + x3r;
993 }
994 } else {
995 for (j = 0; j < l; j += 2) {
996 j1 = j + l;
997 x0r = a[j] - a[j1];
998 x0i = -a[j + 1] + a[j1 + 1];
999 a[j] += a[j1];
1000 a[j + 1] = -a[j + 1] - a[j1 + 1];
1001 a[j1] = x0r;
1002 a[j1 + 1] = x0i;
1003 }
1004 }
1005 }
1006
1007
cft1st(size_t n,float * a,float * w)1008 static void cft1st(size_t n, float *a, float *w)
1009 {
1010 size_t j, k1, k2;
1011 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1012 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1013
1014 x0r = a[0] + a[2];
1015 x0i = a[1] + a[3];
1016 x1r = a[0] - a[2];
1017 x1i = a[1] - a[3];
1018 x2r = a[4] + a[6];
1019 x2i = a[5] + a[7];
1020 x3r = a[4] - a[6];
1021 x3i = a[5] - a[7];
1022 a[0] = x0r + x2r;
1023 a[1] = x0i + x2i;
1024 a[4] = x0r - x2r;
1025 a[5] = x0i - x2i;
1026 a[2] = x1r - x3i;
1027 a[3] = x1i + x3r;
1028 a[6] = x1r + x3i;
1029 a[7] = x1i - x3r;
1030 wk1r = w[2];
1031 x0r = a[8] + a[10];
1032 x0i = a[9] + a[11];
1033 x1r = a[8] - a[10];
1034 x1i = a[9] - a[11];
1035 x2r = a[12] + a[14];
1036 x2i = a[13] + a[15];
1037 x3r = a[12] - a[14];
1038 x3i = a[13] - a[15];
1039 a[8] = x0r + x2r;
1040 a[9] = x0i + x2i;
1041 a[12] = x2i - x0i;
1042 a[13] = x0r - x2r;
1043 x0r = x1r - x3i;
1044 x0i = x1i + x3r;
1045 a[10] = wk1r * (x0r - x0i);
1046 a[11] = wk1r * (x0r + x0i);
1047 x0r = x3i + x1r;
1048 x0i = x3r - x1i;
1049 a[14] = wk1r * (x0i - x0r);
1050 a[15] = wk1r * (x0i + x0r);
1051 k1 = 0;
1052 for (j = 16; j < n; j += 16) {
1053 k1 += 2;
1054 k2 = 2 * k1;
1055 wk2r = w[k1];
1056 wk2i = w[k1 + 1];
1057 wk1r = w[k2];
1058 wk1i = w[k2 + 1];
1059 wk3r = wk1r - 2 * wk2i * wk1i;
1060 wk3i = 2 * wk2i * wk1r - wk1i;
1061 x0r = a[j] + a[j + 2];
1062 x0i = a[j + 1] + a[j + 3];
1063 x1r = a[j] - a[j + 2];
1064 x1i = a[j + 1] - a[j + 3];
1065 x2r = a[j + 4] + a[j + 6];
1066 x2i = a[j + 5] + a[j + 7];
1067 x3r = a[j + 4] - a[j + 6];
1068 x3i = a[j + 5] - a[j + 7];
1069 a[j] = x0r + x2r;
1070 a[j + 1] = x0i + x2i;
1071 x0r -= x2r;
1072 x0i -= x2i;
1073 a[j + 4] = wk2r * x0r - wk2i * x0i;
1074 a[j + 5] = wk2r * x0i + wk2i * x0r;
1075 x0r = x1r - x3i;
1076 x0i = x1i + x3r;
1077 a[j + 2] = wk1r * x0r - wk1i * x0i;
1078 a[j + 3] = wk1r * x0i + wk1i * x0r;
1079 x0r = x1r + x3i;
1080 x0i = x1i - x3r;
1081 a[j + 6] = wk3r * x0r - wk3i * x0i;
1082 a[j + 7] = wk3r * x0i + wk3i * x0r;
1083 wk1r = w[k2 + 2];
1084 wk1i = w[k2 + 3];
1085 wk3r = wk1r - 2 * wk2r * wk1i;
1086 wk3i = 2 * wk2r * wk1r - wk1i;
1087 x0r = a[j + 8] + a[j + 10];
1088 x0i = a[j + 9] + a[j + 11];
1089 x1r = a[j + 8] - a[j + 10];
1090 x1i = a[j + 9] - a[j + 11];
1091 x2r = a[j + 12] + a[j + 14];
1092 x2i = a[j + 13] + a[j + 15];
1093 x3r = a[j + 12] - a[j + 14];
1094 x3i = a[j + 13] - a[j + 15];
1095 a[j + 8] = x0r + x2r;
1096 a[j + 9] = x0i + x2i;
1097 x0r -= x2r;
1098 x0i -= x2i;
1099 a[j + 12] = -wk2i * x0r - wk2r * x0i;
1100 a[j + 13] = -wk2i * x0i + wk2r * x0r;
1101 x0r = x1r - x3i;
1102 x0i = x1i + x3r;
1103 a[j + 10] = wk1r * x0r - wk1i * x0i;
1104 a[j + 11] = wk1r * x0i + wk1i * x0r;
1105 x0r = x1r + x3i;
1106 x0i = x1i - x3r;
1107 a[j + 14] = wk3r * x0r - wk3i * x0i;
1108 a[j + 15] = wk3r * x0i + wk3i * x0r;
1109 }
1110 }
1111
1112
cftmdl(size_t n,size_t l,float * a,float * w)1113 static void cftmdl(size_t n, size_t l, float *a, float *w)
1114 {
1115 size_t j, j1, j2, j3, k, k1, k2, m, m2;
1116 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1117 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1118
1119 m = l << 2;
1120 for (j = 0; j < l; j += 2) {
1121 j1 = j + l;
1122 j2 = j1 + l;
1123 j3 = j2 + l;
1124 x0r = a[j] + a[j1];
1125 x0i = a[j + 1] + a[j1 + 1];
1126 x1r = a[j] - a[j1];
1127 x1i = a[j + 1] - a[j1 + 1];
1128 x2r = a[j2] + a[j3];
1129 x2i = a[j2 + 1] + a[j3 + 1];
1130 x3r = a[j2] - a[j3];
1131 x3i = a[j2 + 1] - a[j3 + 1];
1132 a[j] = x0r + x2r;
1133 a[j + 1] = x0i + x2i;
1134 a[j2] = x0r - x2r;
1135 a[j2 + 1] = x0i - x2i;
1136 a[j1] = x1r - x3i;
1137 a[j1 + 1] = x1i + x3r;
1138 a[j3] = x1r + x3i;
1139 a[j3 + 1] = x1i - x3r;
1140 }
1141 wk1r = w[2];
1142 for (j = m; j < l + m; j += 2) {
1143 j1 = j + l;
1144 j2 = j1 + l;
1145 j3 = j2 + l;
1146 x0r = a[j] + a[j1];
1147 x0i = a[j + 1] + a[j1 + 1];
1148 x1r = a[j] - a[j1];
1149 x1i = a[j + 1] - a[j1 + 1];
1150 x2r = a[j2] + a[j3];
1151 x2i = a[j2 + 1] + a[j3 + 1];
1152 x3r = a[j2] - a[j3];
1153 x3i = a[j2 + 1] - a[j3 + 1];
1154 a[j] = x0r + x2r;
1155 a[j + 1] = x0i + x2i;
1156 a[j2] = x2i - x0i;
1157 a[j2 + 1] = x0r - x2r;
1158 x0r = x1r - x3i;
1159 x0i = x1i + x3r;
1160 a[j1] = wk1r * (x0r - x0i);
1161 a[j1 + 1] = wk1r * (x0r + x0i);
1162 x0r = x3i + x1r;
1163 x0i = x3r - x1i;
1164 a[j3] = wk1r * (x0i - x0r);
1165 a[j3 + 1] = wk1r * (x0i + x0r);
1166 }
1167 k1 = 0;
1168 m2 = 2 * m;
1169 for (k = m2; k < n; k += m2) {
1170 k1 += 2;
1171 k2 = 2 * k1;
1172 wk2r = w[k1];
1173 wk2i = w[k1 + 1];
1174 wk1r = w[k2];
1175 wk1i = w[k2 + 1];
1176 wk3r = wk1r - 2 * wk2i * wk1i;
1177 wk3i = 2 * wk2i * wk1r - wk1i;
1178 for (j = k; j < l + k; j += 2) {
1179 j1 = j + l;
1180 j2 = j1 + l;
1181 j3 = j2 + l;
1182 x0r = a[j] + a[j1];
1183 x0i = a[j + 1] + a[j1 + 1];
1184 x1r = a[j] - a[j1];
1185 x1i = a[j + 1] - a[j1 + 1];
1186 x2r = a[j2] + a[j3];
1187 x2i = a[j2 + 1] + a[j3 + 1];
1188 x3r = a[j2] - a[j3];
1189 x3i = a[j2 + 1] - a[j3 + 1];
1190 a[j] = x0r + x2r;
1191 a[j + 1] = x0i + x2i;
1192 x0r -= x2r;
1193 x0i -= x2i;
1194 a[j2] = wk2r * x0r - wk2i * x0i;
1195 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
1196 x0r = x1r - x3i;
1197 x0i = x1i + x3r;
1198 a[j1] = wk1r * x0r - wk1i * x0i;
1199 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1200 x0r = x1r + x3i;
1201 x0i = x1i - x3r;
1202 a[j3] = wk3r * x0r - wk3i * x0i;
1203 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1204 }
1205 wk1r = w[k2 + 2];
1206 wk1i = w[k2 + 3];
1207 wk3r = wk1r - 2 * wk2r * wk1i;
1208 wk3i = 2 * wk2r * wk1r - wk1i;
1209 for (j = k + m; j < l + (k + m); j += 2) {
1210 j1 = j + l;
1211 j2 = j1 + l;
1212 j3 = j2 + l;
1213 x0r = a[j] + a[j1];
1214 x0i = a[j + 1] + a[j1 + 1];
1215 x1r = a[j] - a[j1];
1216 x1i = a[j + 1] - a[j1 + 1];
1217 x2r = a[j2] + a[j3];
1218 x2i = a[j2 + 1] + a[j3 + 1];
1219 x3r = a[j2] - a[j3];
1220 x3i = a[j2 + 1] - a[j3 + 1];
1221 a[j] = x0r + x2r;
1222 a[j + 1] = x0i + x2i;
1223 x0r -= x2r;
1224 x0i -= x2i;
1225 a[j2] = -wk2i * x0r - wk2r * x0i;
1226 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
1227 x0r = x1r - x3i;
1228 x0i = x1i + x3r;
1229 a[j1] = wk1r * x0r - wk1i * x0i;
1230 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1231 x0r = x1r + x3i;
1232 x0i = x1i - x3r;
1233 a[j3] = wk3r * x0r - wk3i * x0i;
1234 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1235 }
1236 }
1237 }
1238
1239
rftfsub(size_t n,float * a,size_t nc,float * c)1240 static void rftfsub(size_t n, float *a, size_t nc, float *c)
1241 {
1242 size_t j, k, kk, ks, m;
1243 float wkr, wki, xr, xi, yr, yi;
1244
1245 m = n >> 1;
1246 ks = 2 * nc / m;
1247 kk = 0;
1248 for (j = 2; j < m; j += 2) {
1249 k = n - j;
1250 kk += ks;
1251 wkr = 0.5f - c[nc - kk];
1252 wki = c[kk];
1253 xr = a[j] - a[k];
1254 xi = a[j + 1] + a[k + 1];
1255 yr = wkr * xr - wki * xi;
1256 yi = wkr * xi + wki * xr;
1257 a[j] -= yr;
1258 a[j + 1] -= yi;
1259 a[k] += yr;
1260 a[k + 1] -= yi;
1261 }
1262 }
1263
1264
rftbsub(size_t n,float * a,size_t nc,float * c)1265 static void rftbsub(size_t n, float *a, size_t nc, float *c)
1266 {
1267 size_t j, k, kk, ks, m;
1268 float wkr, wki, xr, xi, yr, yi;
1269
1270 a[1] = -a[1];
1271 m = n >> 1;
1272 ks = 2 * nc / m;
1273 kk = 0;
1274 for (j = 2; j < m; j += 2) {
1275 k = n - j;
1276 kk += ks;
1277 wkr = 0.5f - c[nc - kk];
1278 wki = c[kk];
1279 xr = a[j] - a[k];
1280 xi = a[j + 1] + a[k + 1];
1281 yr = wkr * xr + wki * xi;
1282 yi = wkr * xi - wki * xr;
1283 a[j] -= yr;
1284 a[j + 1] = yi - a[j + 1];
1285 a[k] += yr;
1286 a[k + 1] = yi - a[k + 1];
1287 }
1288 a[m + 1] = -a[m + 1];
1289 }
1290
1291 #if 0 // Not used.
1292 static void dctsub(int n, float *a, int nc, float *c)
1293 {
1294 int j, k, kk, ks, m;
1295 float wkr, wki, xr;
1296
1297 m = n >> 1;
1298 ks = nc / n;
1299 kk = 0;
1300 for (j = 1; j < m; j++) {
1301 k = n - j;
1302 kk += ks;
1303 wkr = c[kk] - c[nc - kk];
1304 wki = c[kk] + c[nc - kk];
1305 xr = wki * a[j] - wkr * a[k];
1306 a[j] = wkr * a[j] + wki * a[k];
1307 a[k] = xr;
1308 }
1309 a[m] *= c[0];
1310 }
1311
1312
1313 static void dstsub(int n, float *a, int nc, float *c)
1314 {
1315 int j, k, kk, ks, m;
1316 float wkr, wki, xr;
1317
1318 m = n >> 1;
1319 ks = nc / n;
1320 kk = 0;
1321 for (j = 1; j < m; j++) {
1322 k = n - j;
1323 kk += ks;
1324 wkr = c[kk] - c[nc - kk];
1325 wki = c[kk] + c[nc - kk];
1326 xr = wki * a[k] - wkr * a[j];
1327 a[k] = wkr * a[k] + wki * a[j];
1328 a[j] = xr;
1329 }
1330 a[m] *= c[0];
1331 }
1332 #endif // Not used.
1333