• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 Fast Fourier/Cosine/Sine Transform
3     dimension   :two
4     data length :power of 2
5     decimation  :frequency
6     radix       :split-radix, row-column
7     data        :inplace
8     table       :use
9 functions
10     cdft2d: Complex Discrete Fourier Transform
11     rdft2d: Real Discrete Fourier Transform
12     ddct2d: Discrete Cosine Transform
13     ddst2d: Discrete Sine Transform
14 function prototypes
15     void cdft2d(int, int, int, double **, double *, int *, double *);
16     void rdft2d(int, int, int, double **, double *, int *, double *);
17     void rdft2dsort(int, int, int, double **);
18     void ddct2d(int, int, int, double **, double *, int *, double *);
19     void ddst2d(int, int, int, double **, double *, int *, double *);
20 necessary package
21     fftsg.c  : 1D-FFT package
22 macro definitions
23     USE_FFT2D_PTHREADS : default=not defined
24         FFT2D_MAX_THREADS     : must be 2^N, default=4
25         FFT2D_THREADS_BEGIN_N : default=65536
26     USE_FFT2D_WINTHREADS : default=not defined
27         FFT2D_MAX_THREADS     : must be 2^N, default=4
28         FFT2D_THREADS_BEGIN_N : default=131072
29 
30 
31 -------- Complex DFT (Discrete Fourier Transform) --------
32     [definition]
33         <case1>
34             X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] *
35                             exp(2*pi*i*j1*k1/n1) *
36                             exp(2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2
37         <case2>
38             X[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 x[j1][j2] *
39                             exp(-2*pi*i*j1*k1/n1) *
40                             exp(-2*pi*i*j2*k2/n2), 0<=k1<n1, 0<=k2<n2
41         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
42     [usage]
43         <case1>
44             ip[0] = 0; // first time only
45             cdft2d(n1, 2*n2, 1, a, t, ip, w);
46         <case2>
47             ip[0] = 0; // first time only
48             cdft2d(n1, 2*n2, -1, a, t, ip, w);
49     [parameters]
50         n1     :data length (int)
51                 n1 >= 1, n1 = power of 2
52         2*n2   :data length (int)
53                 n2 >= 1, n2 = power of 2
54         a[0...n1-1][0...2*n2-1]
55                :input/output data (double **)
56                 input data
57                     a[j1][2*j2] = Re(x[j1][j2]),
58                     a[j1][2*j2+1] = Im(x[j1][j2]),
59                     0<=j1<n1, 0<=j2<n2
60                 output data
61                     a[k1][2*k2] = Re(X[k1][k2]),
62                     a[k1][2*k2+1] = Im(X[k1][k2]),
63                     0<=k1<n1, 0<=k2<n2
64         t[0...*]
65                :work area (double *)
66                 length of t >= 8*n1,                   if single thread,
67                 length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads,
68                 t is dynamically allocated, if t == NULL.
69         ip[0...*]
70                :work area for bit reversal (int *)
71                 length of ip >= 2+sqrt(n)
72                 (n = max(n1, n2))
73                 ip[0],ip[1] are pointers of the cos/sin table.
74         w[0...*]
75                :cos/sin table (double *)
76                 length of w >= max(n1/2, n2/2)
77                 w[],ip[] are initialized if ip[0] == 0.
78     [remark]
79         Inverse of
80             cdft2d(n1, 2*n2, -1, a, t, ip, w);
81         is
82             cdft2d(n1, 2*n2, 1, a, t, ip, w);
83             for (j1 = 0; j1 <= n1 - 1; j1++) {
84                 for (j2 = 0; j2 <= 2 * n2 - 1; j2++) {
85                     a[j1][j2] *= 1.0 / n1 / n2;
86                 }
87             }
88         .
89 
90 
91 -------- Real DFT / Inverse of Real DFT --------
92     [definition]
93         <case1> RDFT
94             R[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
95                             cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2),
96                             0<=k1<n1, 0<=k2<n2
97             I[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
98                             sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2),
99                             0<=k1<n1, 0<=k2<n2
100         <case2> IRDFT (excluding scale)
101             a[k1][k2] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1
102                             (R[j1][j2] *
103                             cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2) +
104                             I[j1][j2] *
105                             sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2)),
106                             0<=k1<n1, 0<=k2<n2
107         (notes: R[n1-k1][n2-k2] = R[k1][k2],
108                 I[n1-k1][n2-k2] = -I[k1][k2],
109                 R[n1-k1][0] = R[k1][0],
110                 I[n1-k1][0] = -I[k1][0],
111                 R[0][n2-k2] = R[0][k2],
112                 I[0][n2-k2] = -I[0][k2],
113                 0<k1<n1, 0<k2<n2)
114     [usage]
115         <case1>
116             ip[0] = 0; // first time only
117             rdft2d(n1, n2, 1, a, t, ip, w);
118         <case2>
119             ip[0] = 0; // first time only
120             rdft2d(n1, n2, -1, a, t, ip, w);
121     [parameters]
122         n1     :data length (int)
123                 n1 >= 2, n1 = power of 2
124         n2     :data length (int)
125                 n2 >= 2, n2 = power of 2
126         a[0...n1-1][0...n2-1]
127                :input/output data (double **)
128                 <case1>
129                     output data
130                         a[k1][2*k2] = R[k1][k2] = R[n1-k1][n2-k2],
131                         a[k1][2*k2+1] = I[k1][k2] = -I[n1-k1][n2-k2],
132                             0<k1<n1, 0<k2<n2/2,
133                         a[0][2*k2] = R[0][k2] = R[0][n2-k2],
134                         a[0][2*k2+1] = I[0][k2] = -I[0][n2-k2],
135                             0<k2<n2/2,
136                         a[k1][0] = R[k1][0] = R[n1-k1][0],
137                         a[k1][1] = I[k1][0] = -I[n1-k1][0],
138                         a[n1-k1][1] = R[k1][n2/2] = R[n1-k1][n2/2],
139                         a[n1-k1][0] = -I[k1][n2/2] = I[n1-k1][n2/2],
140                             0<k1<n1/2,
141                         a[0][0] = R[0][0],
142                         a[0][1] = R[0][n2/2],
143                         a[n1/2][0] = R[n1/2][0],
144                         a[n1/2][1] = R[n1/2][n2/2]
145                 <case2>
146                     input data
147                         a[j1][2*j2] = R[j1][j2] = R[n1-j1][n2-j2],
148                         a[j1][2*j2+1] = I[j1][j2] = -I[n1-j1][n2-j2],
149                             0<j1<n1, 0<j2<n2/2,
150                         a[0][2*j2] = R[0][j2] = R[0][n2-j2],
151                         a[0][2*j2+1] = I[0][j2] = -I[0][n2-j2],
152                             0<j2<n2/2,
153                         a[j1][0] = R[j1][0] = R[n1-j1][0],
154                         a[j1][1] = I[j1][0] = -I[n1-j1][0],
155                         a[n1-j1][1] = R[j1][n2/2] = R[n1-j1][n2/2],
156                         a[n1-j1][0] = -I[j1][n2/2] = I[n1-j1][n2/2],
157                             0<j1<n1/2,
158                         a[0][0] = R[0][0],
159                         a[0][1] = R[0][n2/2],
160                         a[n1/2][0] = R[n1/2][0],
161                         a[n1/2][1] = R[n1/2][n2/2]
162                 ---- output ordering ----
163                     rdft2d(n1, n2, 1, a, t, ip, w);
164                     rdft2dsort(n1, n2, 1, a);
165                     // stored data is a[0...n1-1][0...n2+1]:
166                     // a[k1][2*k2] = R[k1][k2],
167                     // a[k1][2*k2+1] = I[k1][k2],
168                     // 0<=k1<n1, 0<=k2<=n2/2.
169                     // the stored data is larger than the input data!
170                 ---- input ordering ----
171                     rdft2dsort(n1, n2, -1, a);
172                     rdft2d(n1, n2, -1, a, t, ip, w);
173         t[0...*]
174                :work area (double *)
175                 length of t >= 8*n1,                   if single thread,
176                 length of t >= 8*n1*FFT2D_MAX_THREADS, if multi threads,
177                 t is dynamically allocated, if t == NULL.
178         ip[0...*]
179                :work area for bit reversal (int *)
180                 length of ip >= 2+sqrt(n)
181                 (n = max(n1, n2/2))
182                 ip[0],ip[1] are pointers of the cos/sin table.
183         w[0...*]
184                :cos/sin table (double *)
185                 length of w >= max(n1/2, n2/4) + n2/4
186                 w[],ip[] are initialized if ip[0] == 0.
187     [remark]
188         Inverse of
189             rdft2d(n1, n2, 1, a, t, ip, w);
190         is
191             rdft2d(n1, n2, -1, a, t, ip, w);
192             for (j1 = 0; j1 <= n1 - 1; j1++) {
193                 for (j2 = 0; j2 <= n2 - 1; j2++) {
194                     a[j1][j2] *= 2.0 / n1 / n2;
195                 }
196             }
197         .
198 
199 
200 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
201     [definition]
202         <case1> IDCT (excluding scale)
203             C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
204                             cos(pi*j1*(k1+1/2)/n1) *
205                             cos(pi*j2*(k2+1/2)/n2),
206                             0<=k1<n1, 0<=k2<n2
207         <case2> DCT
208             C[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
209                             cos(pi*(j1+1/2)*k1/n1) *
210                             cos(pi*(j2+1/2)*k2/n2),
211                             0<=k1<n1, 0<=k2<n2
212     [usage]
213         <case1>
214             ip[0] = 0; // first time only
215             ddct2d(n1, n2, 1, a, t, ip, w);
216         <case2>
217             ip[0] = 0; // first time only
218             ddct2d(n1, n2, -1, a, t, ip, w);
219     [parameters]
220         n1     :data length (int)
221                 n1 >= 2, n1 = power of 2
222         n2     :data length (int)
223                 n2 >= 2, n2 = power of 2
224         a[0...n1-1][0...n2-1]
225                :input/output data (double **)
226                 output data
227                     a[k1][k2] = C[k1][k2], 0<=k1<n1, 0<=k2<n2
228         t[0...*]
229                :work area (double *)
230                 length of t >= 4*n1,                   if single thread,
231                 length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads,
232                 t is dynamically allocated, if t == NULL.
233         ip[0...*]
234                :work area for bit reversal (int *)
235                 length of ip >= 2+sqrt(n)
236                 (n = max(n1/2, n2/2))
237                 ip[0],ip[1] are pointers of the cos/sin table.
238         w[0...*]
239                :cos/sin table (double *)
240                 length of w >= max(n1*3/2, n2*3/2)
241                 w[],ip[] are initialized if ip[0] == 0.
242     [remark]
243         Inverse of
244             ddct2d(n1, n2, -1, a, t, ip, w);
245         is
246             for (j1 = 0; j1 <= n1 - 1; j1++) {
247                 a[j1][0] *= 0.5;
248             }
249             for (j2 = 0; j2 <= n2 - 1; j2++) {
250                 a[0][j2] *= 0.5;
251             }
252             ddct2d(n1, n2, 1, a, t, ip, w);
253             for (j1 = 0; j1 <= n1 - 1; j1++) {
254                 for (j2 = 0; j2 <= n2 - 1; j2++) {
255                     a[j1][j2] *= 4.0 / n1 / n2;
256                 }
257             }
258         .
259 
260 
261 -------- DST (Discrete Sine Transform) / Inverse of DST --------
262     [definition]
263         <case1> IDST (excluding scale)
264             S[k1][k2] = sum_j1=1^n1 sum_j2=1^n2 A[j1][j2] *
265                             sin(pi*j1*(k1+1/2)/n1) *
266                             sin(pi*j2*(k2+1/2)/n2),
267                             0<=k1<n1, 0<=k2<n2
268         <case2> DST
269             S[k1][k2] = sum_j1=0^n1-1 sum_j2=0^n2-1 a[j1][j2] *
270                             sin(pi*(j1+1/2)*k1/n1) *
271                             sin(pi*(j2+1/2)*k2/n2),
272                             0<k1<=n1, 0<k2<=n2
273     [usage]
274         <case1>
275             ip[0] = 0; // first time only
276             ddst2d(n1, n2, 1, a, t, ip, w);
277         <case2>
278             ip[0] = 0; // first time only
279             ddst2d(n1, n2, -1, a, t, ip, w);
280     [parameters]
281         n1     :data length (int)
282                 n1 >= 2, n1 = power of 2
283         n2     :data length (int)
284                 n2 >= 2, n2 = power of 2
285         a[0...n1-1][0...n2-1]
286                :input/output data (double **)
287                 <case1>
288                     input data
289                         a[j1][j2] = A[j1][j2], 0<j1<n1, 0<j2<n2,
290                         a[j1][0] = A[j1][n2], 0<j1<n1,
291                         a[0][j2] = A[n1][j2], 0<j2<n2,
292                         a[0][0] = A[n1][n2]
293                         (i.e. A[j1][j2] = a[j1 % n1][j2 % n2])
294                     output data
295                         a[k1][k2] = S[k1][k2], 0<=k1<n1, 0<=k2<n2
296                 <case2>
297                     output data
298                         a[k1][k2] = S[k1][k2], 0<k1<n1, 0<k2<n2,
299                         a[k1][0] = S[k1][n2], 0<k1<n1,
300                         a[0][k2] = S[n1][k2], 0<k2<n2,
301                         a[0][0] = S[n1][n2]
302                         (i.e. S[k1][k2] = a[k1 % n1][k2 % n2])
303         t[0...*]
304                :work area (double *)
305                 length of t >= 4*n1,                   if single thread,
306                 length of t >= 4*n1*FFT2D_MAX_THREADS, if multi threads,
307                 t is dynamically allocated, if t == NULL.
308         ip[0...*]
309                :work area for bit reversal (int *)
310                 length of ip >= 2+sqrt(n)
311                 (n = max(n1/2, n2/2))
312                 ip[0],ip[1] are pointers of the cos/sin table.
313         w[0...*]
314                :cos/sin table (double *)
315                 length of w >= max(n1*3/2, n2*3/2)
316                 w[],ip[] are initialized if ip[0] == 0.
317     [remark]
318         Inverse of
319             ddst2d(n1, n2, -1, a, t, ip, w);
320         is
321             for (j1 = 0; j1 <= n1 - 1; j1++) {
322                 a[j1][0] *= 0.5;
323             }
324             for (j2 = 0; j2 <= n2 - 1; j2++) {
325                 a[0][j2] *= 0.5;
326             }
327             ddst2d(n1, n2, 1, a, t, ip, w);
328             for (j1 = 0; j1 <= n1 - 1; j1++) {
329                 for (j2 = 0; j2 <= n2 - 1; j2++) {
330                     a[j1][j2] *= 4.0 / n1 / n2;
331                 }
332             }
333         .
334 */
335 
336 
337 #include <stdio.h>
338 #include <stdlib.h>
339 #define fft2d_alloc_error_check(p) { \
340     if ((p) == NULL) { \
341         fprintf(stderr, "fft2d memory allocation error\n"); \
342         exit(1); \
343     } \
344 }
345 
346 
347 #ifdef USE_FFT2D_PTHREADS
348 #define USE_FFT2D_THREADS
349 #ifndef FFT2D_MAX_THREADS
350 #define FFT2D_MAX_THREADS 4
351 #endif
352 #ifndef FFT2D_THREADS_BEGIN_N
353 #define FFT2D_THREADS_BEGIN_N 65536
354 #endif
355 #include <pthread.h>
356 #define fft2d_thread_t pthread_t
357 #define fft2d_thread_create(thp,func,argp) { \
358     if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \
359         fprintf(stderr, "fft2d thread error\n"); \
360         exit(1); \
361     } \
362 }
363 #define fft2d_thread_wait(th) { \
364     if (pthread_join(th, NULL) != 0) { \
365         fprintf(stderr, "fft2d thread error\n"); \
366         exit(1); \
367     } \
368 }
369 #endif /* USE_FFT2D_PTHREADS */
370 
371 
372 #ifdef USE_FFT2D_WINTHREADS
373 #define USE_FFT2D_THREADS
374 #ifndef FFT2D_MAX_THREADS
375 #define FFT2D_MAX_THREADS 4
376 #endif
377 #ifndef FFT2D_THREADS_BEGIN_N
378 #define FFT2D_THREADS_BEGIN_N 131072
379 #endif
380 #include <windows.h>
381 #define fft2d_thread_t HANDLE
382 #define fft2d_thread_create(thp,func,argp) { \
383     DWORD thid; \
384     *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \
385     if (*(thp) == 0) { \
386         fprintf(stderr, "fft2d thread error\n"); \
387         exit(1); \
388     } \
389 }
390 #define fft2d_thread_wait(th) { \
391     WaitForSingleObject(th, INFINITE); \
392     CloseHandle(th); \
393 }
394 #endif /* USE_FFT2D_WINTHREADS */
395 
396 
cdft2d(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)397 void cdft2d(int n1, int n2, int isgn, double **a, double *t,
398     int *ip, double *w)
399 {
400     void makewt(int nw, int *ip, double *w);
401     void cdft(int n, int isgn, double *a, int *ip, double *w);
402     void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
403         int *ip, double *w);
404 #ifdef USE_FFT2D_THREADS
405     void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
406         int *ip, double *w);
407     void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
408         int *ip, double *w);
409 #endif /* USE_FFT2D_THREADS */
410     int n, itnull, nthread, nt, i;
411 
412     n = n1 << 1;
413     if (n < n2) {
414         n = n2;
415     }
416     if (n > (ip[0] << 2)) {
417         makewt(n >> 2, ip, w);
418     }
419     itnull = 0;
420     if (t == NULL) {
421         itnull = 1;
422         nthread = 1;
423 #ifdef USE_FFT2D_THREADS
424         nthread = FFT2D_MAX_THREADS;
425 #endif /* USE_FFT2D_THREADS */
426         nt = 8 * nthread * n1;
427         if (n2 == 4 * nthread) {
428             nt >>= 1;
429         } else if (n2 < 4 * nthread) {
430             nt >>= 2;
431         }
432         t = (double *) malloc(sizeof(double) * nt);
433         fft2d_alloc_error_check(t);
434     }
435 #ifdef USE_FFT2D_THREADS
436     if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
437         xdft2d0_subth(n1, n2, 0, isgn, a, ip, w);
438         cdft2d_subth(n1, n2, isgn, a, t, ip, w);
439     } else
440 #endif /* USE_FFT2D_THREADS */
441     {
442         for (i = 0; i < n1; i++) {
443             cdft(n2, isgn, a[i], ip, w);
444         }
445         cdft2d_sub(n1, n2, isgn, a, t, ip, w);
446     }
447     if (itnull != 0) {
448         free(t);
449     }
450 }
451 
452 
rdft2d(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)453 void rdft2d(int n1, int n2, int isgn, double **a, double *t,
454     int *ip, double *w)
455 {
456     void makewt(int nw, int *ip, double *w);
457     void makect(int nc, int *ip, double *c);
458     void rdft(int n, int isgn, double *a, int *ip, double *w);
459     void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
460         int *ip, double *w);
461     void rdft2d_sub(int n1, int n2, int isgn, double **a);
462 #ifdef USE_FFT2D_THREADS
463     void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
464         int *ip, double *w);
465     void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
466         int *ip, double *w);
467 #endif /* USE_FFT2D_THREADS */
468     int n, nw, nc, itnull, nthread, nt, i;
469 
470     n = n1 << 1;
471     if (n < n2) {
472         n = n2;
473     }
474     nw = ip[0];
475     if (n > (nw << 2)) {
476         nw = n >> 2;
477         makewt(nw, ip, w);
478     }
479     nc = ip[1];
480     if (n2 > (nc << 2)) {
481         nc = n2 >> 2;
482         makect(nc, ip, w + nw);
483     }
484     itnull = 0;
485     if (t == NULL) {
486         itnull = 1;
487         nthread = 1;
488 #ifdef USE_FFT2D_THREADS
489         nthread = FFT2D_MAX_THREADS;
490 #endif /* USE_FFT2D_THREADS */
491         nt = 8 * nthread * n1;
492         if (n2 == 4 * nthread) {
493             nt >>= 1;
494         } else if (n2 < 4 * nthread) {
495             nt >>= 2;
496         }
497         t = (double *) malloc(sizeof(double) * nt);
498         fft2d_alloc_error_check(t);
499     }
500 #ifdef USE_FFT2D_THREADS
501     if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
502         if (isgn < 0) {
503             rdft2d_sub(n1, n2, isgn, a);
504             cdft2d_subth(n1, n2, isgn, a, t, ip, w);
505         }
506         xdft2d0_subth(n1, n2, 1, isgn, a, ip, w);
507         if (isgn >= 0) {
508             cdft2d_subth(n1, n2, isgn, a, t, ip, w);
509             rdft2d_sub(n1, n2, isgn, a);
510         }
511     } else
512 #endif /* USE_FFT2D_THREADS */
513     {
514         if (isgn < 0) {
515             rdft2d_sub(n1, n2, isgn, a);
516             cdft2d_sub(n1, n2, isgn, a, t, ip, w);
517         }
518         for (i = 0; i < n1; i++) {
519             rdft(n2, isgn, a[i], ip, w);
520         }
521         if (isgn >= 0) {
522             cdft2d_sub(n1, n2, isgn, a, t, ip, w);
523             rdft2d_sub(n1, n2, isgn, a);
524         }
525     }
526     if (itnull != 0) {
527         free(t);
528     }
529 }
530 
531 
rdft2dsort(int n1,int n2,int isgn,double ** a)532 void rdft2dsort(int n1, int n2, int isgn, double **a)
533 {
534     int n1h, i;
535     double x, y;
536 
537     n1h = n1 >> 1;
538     if (isgn < 0) {
539         for (i = n1h + 1; i < n1; i++) {
540             a[i][0] = a[i][n2 + 1];
541             a[i][1] = a[i][n2];
542         }
543         a[0][1] = a[0][n2];
544         a[n1h][1] = a[n1h][n2];
545     } else {
546         for (i = n1h + 1; i < n1; i++) {
547             y = a[i][0];
548             x = a[i][1];
549             a[i][n2] = x;
550             a[i][n2 + 1] = y;
551             a[n1 - i][n2] = x;
552             a[n1 - i][n2 + 1] = -y;
553             a[i][0] = a[n1 - i][0];
554             a[i][1] = -a[n1 - i][1];
555         }
556         a[0][n2] = a[0][1];
557         a[0][n2 + 1] = 0;
558         a[0][1] = 0;
559         a[n1h][n2] = a[n1h][1];
560         a[n1h][n2 + 1] = 0;
561         a[n1h][1] = 0;
562     }
563 }
564 
565 
ddct2d(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)566 void ddct2d(int n1, int n2, int isgn, double **a, double *t,
567     int *ip, double *w)
568 {
569     void makewt(int nw, int *ip, double *w);
570     void makect(int nc, int *ip, double *c);
571     void ddct(int n, int isgn, double *a, int *ip, double *w);
572     void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
573         double *t, int *ip, double *w);
574 #ifdef USE_FFT2D_THREADS
575     void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
576         int *ip, double *w);
577     void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
578         double *t, int *ip, double *w);
579 #endif /* USE_FFT2D_THREADS */
580     int n, nw, nc, itnull, nthread, nt, i;
581 
582     n = n1;
583     if (n < n2) {
584         n = n2;
585     }
586     nw = ip[0];
587     if (n > (nw << 2)) {
588         nw = n >> 2;
589         makewt(nw, ip, w);
590     }
591     nc = ip[1];
592     if (n > nc) {
593         nc = n;
594         makect(nc, ip, w + nw);
595     }
596     itnull = 0;
597     if (t == NULL) {
598         itnull = 1;
599         nthread = 1;
600 #ifdef USE_FFT2D_THREADS
601         nthread = FFT2D_MAX_THREADS;
602 #endif /* USE_FFT2D_THREADS */
603         nt = 4 * nthread * n1;
604         if (n2 == 2 * nthread) {
605             nt >>= 1;
606         } else if (n2 < 2 * nthread) {
607             nt >>= 2;
608         }
609         t = (double *) malloc(sizeof(double) * nt);
610         fft2d_alloc_error_check(t);
611     }
612 #ifdef USE_FFT2D_THREADS
613     if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
614         ddxt2d0_subth(n1, n2, 0, isgn, a, ip, w);
615         ddxt2d_subth(n1, n2, 0, isgn, a, t, ip, w);
616     } else
617 #endif /* USE_FFT2D_THREADS */
618     {
619         for (i = 0; i < n1; i++) {
620             ddct(n2, isgn, a[i], ip, w);
621         }
622         ddxt2d_sub(n1, n2, 0, isgn, a, t, ip, w);
623     }
624     if (itnull != 0) {
625         free(t);
626     }
627 }
628 
629 
ddst2d(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)630 void ddst2d(int n1, int n2, int isgn, double **a, double *t,
631     int *ip, double *w)
632 {
633     void makewt(int nw, int *ip, double *w);
634     void makect(int nc, int *ip, double *c);
635     void ddst(int n, int isgn, double *a, int *ip, double *w);
636     void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
637         double *t, int *ip, double *w);
638 #ifdef USE_FFT2D_THREADS
639     void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
640         int *ip, double *w);
641     void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
642         double *t, int *ip, double *w);
643 #endif /* USE_FFT2D_THREADS */
644     int n, nw, nc, itnull, nthread, nt, i;
645 
646     n = n1;
647     if (n < n2) {
648         n = n2;
649     }
650     nw = ip[0];
651     if (n > (nw << 2)) {
652         nw = n >> 2;
653         makewt(nw, ip, w);
654     }
655     nc = ip[1];
656     if (n > nc) {
657         nc = n;
658         makect(nc, ip, w + nw);
659     }
660     itnull = 0;
661     if (t == NULL) {
662         itnull = 1;
663         nthread = 1;
664 #ifdef USE_FFT2D_THREADS
665         nthread = FFT2D_MAX_THREADS;
666 #endif /* USE_FFT2D_THREADS */
667         nt = 4 * nthread * n1;
668         if (n2 == 2 * nthread) {
669             nt >>= 1;
670         } else if (n2 < 2 * nthread) {
671             nt >>= 2;
672         }
673         t = (double *) malloc(sizeof(double) * nt);
674         fft2d_alloc_error_check(t);
675     }
676 #ifdef USE_FFT2D_THREADS
677     if ((double) n1 * n2 >= (double) FFT2D_THREADS_BEGIN_N) {
678         ddxt2d0_subth(n1, n2, 1, isgn, a, ip, w);
679         ddxt2d_subth(n1, n2, 1, isgn, a, t, ip, w);
680     } else
681 #endif /* USE_FFT2D_THREADS */
682     {
683         for (i = 0; i < n1; i++) {
684             ddst(n2, isgn, a[i], ip, w);
685         }
686         ddxt2d_sub(n1, n2, 1, isgn, a, t, ip, w);
687     }
688     if (itnull != 0) {
689         free(t);
690     }
691 }
692 
693 
694 /* -------- child routines -------- */
695 
696 
cdft2d_sub(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)697 void cdft2d_sub(int n1, int n2, int isgn, double **a, double *t,
698     int *ip, double *w)
699 {
700     void cdft(int n, int isgn, double *a, int *ip, double *w);
701     int i, j;
702 
703     if (n2 > 4) {
704         for (j = 0; j < n2; j += 8) {
705             for (i = 0; i < n1; i++) {
706                 t[2 * i] = a[i][j];
707                 t[2 * i + 1] = a[i][j + 1];
708                 t[2 * n1 + 2 * i] = a[i][j + 2];
709                 t[2 * n1 + 2 * i + 1] = a[i][j + 3];
710                 t[4 * n1 + 2 * i] = a[i][j + 4];
711                 t[4 * n1 + 2 * i + 1] = a[i][j + 5];
712                 t[6 * n1 + 2 * i] = a[i][j + 6];
713                 t[6 * n1 + 2 * i + 1] = a[i][j + 7];
714             }
715             cdft(2 * n1, isgn, t, ip, w);
716             cdft(2 * n1, isgn, &t[2 * n1], ip, w);
717             cdft(2 * n1, isgn, &t[4 * n1], ip, w);
718             cdft(2 * n1, isgn, &t[6 * n1], ip, w);
719             for (i = 0; i < n1; i++) {
720                 a[i][j] = t[2 * i];
721                 a[i][j + 1] = t[2 * i + 1];
722                 a[i][j + 2] = t[2 * n1 + 2 * i];
723                 a[i][j + 3] = t[2 * n1 + 2 * i + 1];
724                 a[i][j + 4] = t[4 * n1 + 2 * i];
725                 a[i][j + 5] = t[4 * n1 + 2 * i + 1];
726                 a[i][j + 6] = t[6 * n1 + 2 * i];
727                 a[i][j + 7] = t[6 * n1 + 2 * i + 1];
728             }
729         }
730     } else if (n2 == 4) {
731         for (i = 0; i < n1; i++) {
732             t[2 * i] = a[i][0];
733             t[2 * i + 1] = a[i][1];
734             t[2 * n1 + 2 * i] = a[i][2];
735             t[2 * n1 + 2 * i + 1] = a[i][3];
736         }
737         cdft(2 * n1, isgn, t, ip, w);
738         cdft(2 * n1, isgn, &t[2 * n1], ip, w);
739         for (i = 0; i < n1; i++) {
740             a[i][0] = t[2 * i];
741             a[i][1] = t[2 * i + 1];
742             a[i][2] = t[2 * n1 + 2 * i];
743             a[i][3] = t[2 * n1 + 2 * i + 1];
744         }
745     } else if (n2 == 2) {
746         for (i = 0; i < n1; i++) {
747             t[2 * i] = a[i][0];
748             t[2 * i + 1] = a[i][1];
749         }
750         cdft(2 * n1, isgn, t, ip, w);
751         for (i = 0; i < n1; i++) {
752             a[i][0] = t[2 * i];
753             a[i][1] = t[2 * i + 1];
754         }
755     }
756 }
757 
758 
rdft2d_sub(int n1,int n2,int isgn,double ** a)759 void rdft2d_sub(int n1, int n2, int isgn, double **a)
760 {
761     int n1h, i, j;
762     double xi;
763 
764     n1h = n1 >> 1;
765     if (isgn < 0) {
766         for (i = 1; i < n1h; i++) {
767             j = n1 - i;
768             xi = a[i][0] - a[j][0];
769             a[i][0] += a[j][0];
770             a[j][0] = xi;
771             xi = a[j][1] - a[i][1];
772             a[i][1] += a[j][1];
773             a[j][1] = xi;
774         }
775     } else {
776         for (i = 1; i < n1h; i++) {
777             j = n1 - i;
778             a[j][0] = 0.5 * (a[i][0] - a[j][0]);
779             a[i][0] -= a[j][0];
780             a[j][1] = 0.5 * (a[i][1] + a[j][1]);
781             a[i][1] -= a[j][1];
782         }
783     }
784 }
785 
786 
ddxt2d_sub(int n1,int n2,int ics,int isgn,double ** a,double * t,int * ip,double * w)787 void ddxt2d_sub(int n1, int n2, int ics, int isgn, double **a,
788     double *t, int *ip, double *w)
789 {
790     void ddct(int n, int isgn, double *a, int *ip, double *w);
791     void ddst(int n, int isgn, double *a, int *ip, double *w);
792     int i, j;
793 
794     if (n2 > 2) {
795         for (j = 0; j < n2; j += 4) {
796             for (i = 0; i < n1; i++) {
797                 t[i] = a[i][j];
798                 t[n1 + i] = a[i][j + 1];
799                 t[2 * n1 + i] = a[i][j + 2];
800                 t[3 * n1 + i] = a[i][j + 3];
801             }
802             if (ics == 0) {
803                 ddct(n1, isgn, t, ip, w);
804                 ddct(n1, isgn, &t[n1], ip, w);
805                 ddct(n1, isgn, &t[2 * n1], ip, w);
806                 ddct(n1, isgn, &t[3 * n1], ip, w);
807             } else {
808                 ddst(n1, isgn, t, ip, w);
809                 ddst(n1, isgn, &t[n1], ip, w);
810                 ddst(n1, isgn, &t[2 * n1], ip, w);
811                 ddst(n1, isgn, &t[3 * n1], ip, w);
812             }
813             for (i = 0; i < n1; i++) {
814                 a[i][j] = t[i];
815                 a[i][j + 1] = t[n1 + i];
816                 a[i][j + 2] = t[2 * n1 + i];
817                 a[i][j + 3] = t[3 * n1 + i];
818             }
819         }
820     } else if (n2 == 2) {
821         for (i = 0; i < n1; i++) {
822             t[i] = a[i][0];
823             t[n1 + i] = a[i][1];
824         }
825         if (ics == 0) {
826             ddct(n1, isgn, t, ip, w);
827             ddct(n1, isgn, &t[n1], ip, w);
828         } else {
829             ddst(n1, isgn, t, ip, w);
830             ddst(n1, isgn, &t[n1], ip, w);
831         }
832         for (i = 0; i < n1; i++) {
833             a[i][0] = t[i];
834             a[i][1] = t[n1 + i];
835         }
836     }
837 }
838 
839 
840 #ifdef USE_FFT2D_THREADS
841 struct fft2d_arg_st {
842     int nthread;
843     int n0;
844     int n1;
845     int n2;
846     int ic;
847     int isgn;
848     double **a;
849     double *t;
850     int *ip;
851     double *w;
852 };
853 typedef struct fft2d_arg_st fft2d_arg_t;
854 
855 
xdft2d0_subth(int n1,int n2,int icr,int isgn,double ** a,int * ip,double * w)856 void xdft2d0_subth(int n1, int n2, int icr, int isgn, double **a,
857     int *ip, double *w)
858 {
859     void *xdft2d0_th(void *p);
860     fft2d_thread_t th[FFT2D_MAX_THREADS];
861     fft2d_arg_t ag[FFT2D_MAX_THREADS];
862     int nthread, i;
863 
864     nthread = FFT2D_MAX_THREADS;
865     if (nthread > n1) {
866         nthread = n1;
867     }
868     for (i = 0; i < nthread; i++) {
869         ag[i].nthread = nthread;
870         ag[i].n0 = i;
871         ag[i].n1 = n1;
872         ag[i].n2 = n2;
873         ag[i].ic = icr;
874         ag[i].isgn = isgn;
875         ag[i].a = a;
876         ag[i].ip = ip;
877         ag[i].w = w;
878         fft2d_thread_create(&th[i], xdft2d0_th, &ag[i]);
879     }
880     for (i = 0; i < nthread; i++) {
881         fft2d_thread_wait(th[i]);
882     }
883 }
884 
885 
cdft2d_subth(int n1,int n2,int isgn,double ** a,double * t,int * ip,double * w)886 void cdft2d_subth(int n1, int n2, int isgn, double **a, double *t,
887     int *ip, double *w)
888 {
889     void *cdft2d_th(void *p);
890     fft2d_thread_t th[FFT2D_MAX_THREADS];
891     fft2d_arg_t ag[FFT2D_MAX_THREADS];
892     int nthread, nt, i;
893 
894     nthread = FFT2D_MAX_THREADS;
895     nt = 8 * n1;
896     if (n2 == 4 * FFT2D_MAX_THREADS) {
897         nt >>= 1;
898     } else if (n2 < 4 * FFT2D_MAX_THREADS) {
899         nthread = n2 >> 1;
900         nt >>= 2;
901     }
902     for (i = 0; i < nthread; i++) {
903         ag[i].nthread = nthread;
904         ag[i].n0 = i;
905         ag[i].n1 = n1;
906         ag[i].n2 = n2;
907         ag[i].isgn = isgn;
908         ag[i].a = a;
909         ag[i].t = &t[nt * i];
910         ag[i].ip = ip;
911         ag[i].w = w;
912         fft2d_thread_create(&th[i], cdft2d_th, &ag[i]);
913     }
914     for (i = 0; i < nthread; i++) {
915         fft2d_thread_wait(th[i]);
916     }
917 }
918 
919 
ddxt2d0_subth(int n1,int n2,int ics,int isgn,double ** a,int * ip,double * w)920 void ddxt2d0_subth(int n1, int n2, int ics, int isgn, double **a,
921     int *ip, double *w)
922 {
923     void *ddxt2d0_th(void *p);
924     fft2d_thread_t th[FFT2D_MAX_THREADS];
925     fft2d_arg_t ag[FFT2D_MAX_THREADS];
926     int nthread, i;
927 
928     nthread = FFT2D_MAX_THREADS;
929     if (nthread > n1) {
930         nthread = n1;
931     }
932     for (i = 0; i < nthread; i++) {
933         ag[i].nthread = nthread;
934         ag[i].n0 = i;
935         ag[i].n1 = n1;
936         ag[i].n2 = n2;
937         ag[i].ic = ics;
938         ag[i].isgn = isgn;
939         ag[i].a = a;
940         ag[i].ip = ip;
941         ag[i].w = w;
942         fft2d_thread_create(&th[i], ddxt2d0_th, &ag[i]);
943     }
944     for (i = 0; i < nthread; i++) {
945         fft2d_thread_wait(th[i]);
946     }
947 }
948 
949 
ddxt2d_subth(int n1,int n2,int ics,int isgn,double ** a,double * t,int * ip,double * w)950 void ddxt2d_subth(int n1, int n2, int ics, int isgn, double **a,
951     double *t, int *ip, double *w)
952 {
953     void *ddxt2d_th(void *p);
954     fft2d_thread_t th[FFT2D_MAX_THREADS];
955     fft2d_arg_t ag[FFT2D_MAX_THREADS];
956     int nthread, nt, i;
957 
958     nthread = FFT2D_MAX_THREADS;
959     nt = 4 * n1;
960     if (n2 == 2 * FFT2D_MAX_THREADS) {
961         nt >>= 1;
962     } else if (n2 < 2 * FFT2D_MAX_THREADS) {
963         nthread = n2;
964         nt >>= 2;
965     }
966     for (i = 0; i < nthread; i++) {
967         ag[i].nthread = nthread;
968         ag[i].n0 = i;
969         ag[i].n1 = n1;
970         ag[i].n2 = n2;
971         ag[i].ic = ics;
972         ag[i].isgn = isgn;
973         ag[i].a = a;
974         ag[i].t = &t[nt * i];
975         ag[i].ip = ip;
976         ag[i].w = w;
977         fft2d_thread_create(&th[i], ddxt2d_th, &ag[i]);
978     }
979     for (i = 0; i < nthread; i++) {
980         fft2d_thread_wait(th[i]);
981     }
982 }
983 
984 
xdft2d0_th(void * p)985 void *xdft2d0_th(void *p)
986 {
987     void cdft(int n, int isgn, double *a, int *ip, double *w);
988     void rdft(int n, int isgn, double *a, int *ip, double *w);
989     int nthread, n0, n1, n2, icr, isgn, *ip, i;
990     double **a, *w;
991 
992     nthread = ((fft2d_arg_t *) p)->nthread;
993     n0 = ((fft2d_arg_t *) p)->n0;
994     n1 = ((fft2d_arg_t *) p)->n1;
995     n2 = ((fft2d_arg_t *) p)->n2;
996     icr = ((fft2d_arg_t *) p)->ic;
997     isgn = ((fft2d_arg_t *) p)->isgn;
998     a = ((fft2d_arg_t *) p)->a;
999     ip = ((fft2d_arg_t *) p)->ip;
1000     w = ((fft2d_arg_t *) p)->w;
1001     if (icr == 0) {
1002         for (i = n0; i < n1; i += nthread) {
1003             cdft(n2, isgn, a[i], ip, w);
1004         }
1005     } else {
1006         for (i = n0; i < n1; i += nthread) {
1007             rdft(n2, isgn, a[i], ip, w);
1008         }
1009     }
1010     return (void *) 0;
1011 }
1012 
1013 
cdft2d_th(void * p)1014 void *cdft2d_th(void *p)
1015 {
1016     void cdft(int n, int isgn, double *a, int *ip, double *w);
1017     int nthread, n0, n1, n2, isgn, *ip, i, j;
1018     double **a, *t, *w;
1019 
1020     nthread = ((fft2d_arg_t *) p)->nthread;
1021     n0 = ((fft2d_arg_t *) p)->n0;
1022     n1 = ((fft2d_arg_t *) p)->n1;
1023     n2 = ((fft2d_arg_t *) p)->n2;
1024     isgn = ((fft2d_arg_t *) p)->isgn;
1025     a = ((fft2d_arg_t *) p)->a;
1026     t = ((fft2d_arg_t *) p)->t;
1027     ip = ((fft2d_arg_t *) p)->ip;
1028     w = ((fft2d_arg_t *) p)->w;
1029     if (n2 > 4 * nthread) {
1030         for (j = 8 * n0; j < n2; j += 8 * nthread) {
1031             for (i = 0; i < n1; i++) {
1032                 t[2 * i] = a[i][j];
1033                 t[2 * i + 1] = a[i][j + 1];
1034                 t[2 * n1 + 2 * i] = a[i][j + 2];
1035                 t[2 * n1 + 2 * i + 1] = a[i][j + 3];
1036                 t[4 * n1 + 2 * i] = a[i][j + 4];
1037                 t[4 * n1 + 2 * i + 1] = a[i][j + 5];
1038                 t[6 * n1 + 2 * i] = a[i][j + 6];
1039                 t[6 * n1 + 2 * i + 1] = a[i][j + 7];
1040             }
1041             cdft(2 * n1, isgn, t, ip, w);
1042             cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1043             cdft(2 * n1, isgn, &t[4 * n1], ip, w);
1044             cdft(2 * n1, isgn, &t[6 * n1], ip, w);
1045             for (i = 0; i < n1; i++) {
1046                 a[i][j] = t[2 * i];
1047                 a[i][j + 1] = t[2 * i + 1];
1048                 a[i][j + 2] = t[2 * n1 + 2 * i];
1049                 a[i][j + 3] = t[2 * n1 + 2 * i + 1];
1050                 a[i][j + 4] = t[4 * n1 + 2 * i];
1051                 a[i][j + 5] = t[4 * n1 + 2 * i + 1];
1052                 a[i][j + 6] = t[6 * n1 + 2 * i];
1053                 a[i][j + 7] = t[6 * n1 + 2 * i + 1];
1054             }
1055         }
1056     } else if (n2 == 4 * nthread) {
1057         for (i = 0; i < n1; i++) {
1058             t[2 * i] = a[i][4 * n0];
1059             t[2 * i + 1] = a[i][4 * n0 + 1];
1060             t[2 * n1 + 2 * i] = a[i][4 * n0 + 2];
1061             t[2 * n1 + 2 * i + 1] = a[i][4 * n0 + 3];
1062         }
1063         cdft(2 * n1, isgn, t, ip, w);
1064         cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1065         for (i = 0; i < n1; i++) {
1066             a[i][4 * n0] = t[2 * i];
1067             a[i][4 * n0 + 1] = t[2 * i + 1];
1068             a[i][4 * n0 + 2] = t[2 * n1 + 2 * i];
1069             a[i][4 * n0 + 3] = t[2 * n1 + 2 * i + 1];
1070         }
1071     } else if (n2 == 2 * nthread) {
1072         for (i = 0; i < n1; i++) {
1073             t[2 * i] = a[i][2 * n0];
1074             t[2 * i + 1] = a[i][2 * n0 + 1];
1075         }
1076         cdft(2 * n1, isgn, t, ip, w);
1077         for (i = 0; i < n1; i++) {
1078             a[i][2 * n0] = t[2 * i];
1079             a[i][2 * n0 + 1] = t[2 * i + 1];
1080         }
1081     }
1082     return (void *) 0;
1083 }
1084 
1085 
ddxt2d0_th(void * p)1086 void *ddxt2d0_th(void *p)
1087 {
1088     void ddct(int n, int isgn, double *a, int *ip, double *w);
1089     void ddst(int n, int isgn, double *a, int *ip, double *w);
1090     int nthread, n0, n1, n2, ics, isgn, *ip, i;
1091     double **a, *w;
1092 
1093     nthread = ((fft2d_arg_t *) p)->nthread;
1094     n0 = ((fft2d_arg_t *) p)->n0;
1095     n1 = ((fft2d_arg_t *) p)->n1;
1096     n2 = ((fft2d_arg_t *) p)->n2;
1097     ics = ((fft2d_arg_t *) p)->ic;
1098     isgn = ((fft2d_arg_t *) p)->isgn;
1099     a = ((fft2d_arg_t *) p)->a;
1100     ip = ((fft2d_arg_t *) p)->ip;
1101     w = ((fft2d_arg_t *) p)->w;
1102     if (ics == 0) {
1103         for (i = n0; i < n1; i += nthread) {
1104             ddct(n2, isgn, a[i], ip, w);
1105         }
1106     } else {
1107         for (i = n0; i < n1; i += nthread) {
1108             ddst(n2, isgn, a[i], ip, w);
1109         }
1110     }
1111     return (void *) 0;
1112 }
1113 
1114 
ddxt2d_th(void * p)1115 void *ddxt2d_th(void *p)
1116 {
1117     void ddct(int n, int isgn, double *a, int *ip, double *w);
1118     void ddst(int n, int isgn, double *a, int *ip, double *w);
1119     int nthread, n0, n1, n2, ics, isgn, *ip, i, j;
1120     double **a, *t, *w;
1121 
1122     nthread = ((fft2d_arg_t *) p)->nthread;
1123     n0 = ((fft2d_arg_t *) p)->n0;
1124     n1 = ((fft2d_arg_t *) p)->n1;
1125     n2 = ((fft2d_arg_t *) p)->n2;
1126     ics = ((fft2d_arg_t *) p)->ic;
1127     isgn = ((fft2d_arg_t *) p)->isgn;
1128     a = ((fft2d_arg_t *) p)->a;
1129     t = ((fft2d_arg_t *) p)->t;
1130     ip = ((fft2d_arg_t *) p)->ip;
1131     w = ((fft2d_arg_t *) p)->w;
1132     if (n2 > 2 * nthread) {
1133         for (j = 4 * n0; j < n2; j += 4 * nthread) {
1134             for (i = 0; i < n1; i++) {
1135                 t[i] = a[i][j];
1136                 t[n1 + i] = a[i][j + 1];
1137                 t[2 * n1 + i] = a[i][j + 2];
1138                 t[3 * n1 + i] = a[i][j + 3];
1139             }
1140             if (ics == 0) {
1141                 ddct(n1, isgn, t, ip, w);
1142                 ddct(n1, isgn, &t[n1], ip, w);
1143                 ddct(n1, isgn, &t[2 * n1], ip, w);
1144                 ddct(n1, isgn, &t[3 * n1], ip, w);
1145             } else {
1146                 ddst(n1, isgn, t, ip, w);
1147                 ddst(n1, isgn, &t[n1], ip, w);
1148                 ddst(n1, isgn, &t[2 * n1], ip, w);
1149                 ddst(n1, isgn, &t[3 * n1], ip, w);
1150             }
1151             for (i = 0; i < n1; i++) {
1152                 a[i][j] = t[i];
1153                 a[i][j + 1] = t[n1 + i];
1154                 a[i][j + 2] = t[2 * n1 + i];
1155                 a[i][j + 3] = t[3 * n1 + i];
1156             }
1157         }
1158     } else if (n2 == 2 * nthread) {
1159         for (i = 0; i < n1; i++) {
1160             t[i] = a[i][2 * n0];
1161             t[n1 + i] = a[i][2 * n0 + 1];
1162         }
1163         if (ics == 0) {
1164             ddct(n1, isgn, t, ip, w);
1165             ddct(n1, isgn, &t[n1], ip, w);
1166         } else {
1167             ddst(n1, isgn, t, ip, w);
1168             ddst(n1, isgn, &t[n1], ip, w);
1169         }
1170         for (i = 0; i < n1; i++) {
1171             a[i][2 * n0] = t[i];
1172             a[i][2 * n0 + 1] = t[n1 + i];
1173         }
1174     } else if (n2 == nthread) {
1175         for (i = 0; i < n1; i++) {
1176             t[i] = a[i][n0];
1177         }
1178         if (ics == 0) {
1179             ddct(n1, isgn, t, ip, w);
1180         } else {
1181             ddst(n1, isgn, t, ip, w);
1182         }
1183         for (i = 0; i < n1; i++) {
1184             a[i][n0] = t[i];
1185         }
1186     }
1187     return (void *) 0;
1188 }
1189 #endif /* USE_FFT2D_THREADS */
1190 
1191