• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2013 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24@ The fftx_internal_vfp versions of the functions obey a modified AAPCS:
25@ VFP is in RunFast mode, vector length 4, stride 1 thoroughout, and
26@ all single-precision VFP registers may be corrupted on exit. The a2
27@ register may not be clobbered in these functions, as it holds the
28@ stored original FPSCR.
29
30function ff_fft_calc_vfp, export=1
31        ldr     ip, [a1, #0]    @ nbits
32        mov     a1, a2
33        movrel  a2, (fft_tab_vfp - 8)
34        ldr     pc, [a2, ip, lsl #2]
35endfunc
36const   fft_tab_vfp, relocate=1
37        .word   fft4_vfp
38        .word   fft8_vfp
39        .word   X(ff_fft16_vfp)     @ this one alone is exported
40        .word   fft32_vfp
41        .word   fft64_vfp
42        .word   fft128_vfp
43        .word   fft256_vfp
44        .word   fft512_vfp
45        .word   fft1024_vfp
46        .word   fft2048_vfp
47        .word   fft4096_vfp
48        .word   fft8192_vfp
49        .word   fft16384_vfp
50        .word   fft32768_vfp
51        .word   fft65536_vfp
52endconst
53
54function fft4_vfp
55        vldr    d0, [a1, #0*2*4]   @ s0,s1   = z[0]
56        vldr    d4, [a1, #1*2*4]   @ s8,s9   = z[1]
57        vldr    d1, [a1, #2*2*4]   @ s2,s3   = z[2]
58        vldr    d5, [a1, #3*2*4]   @ s10,s11 = z[3]
59        @ stall
60        vadd.f  s12, s0, s8        @ i0
61        vadd.f  s13, s1, s9        @ i1
62        vadd.f  s14, s2, s10       @ i2
63        vadd.f  s15, s3, s11       @ i3
64        vsub.f  s8, s0, s8         @ i4
65        vsub.f  s9, s1, s9         @ i5
66        vsub.f  s10, s2, s10       @ i6
67        vsub.f  s11, s3, s11       @ i7
68        @ stall
69        @ stall
70        vadd.f  s0, s12, s14       @ z[0].re
71        vsub.f  s4, s12, s14       @ z[2].re
72        vadd.f  s1, s13, s15       @ z[0].im
73        vsub.f  s5, s13, s15       @ z[2].im
74        vadd.f  s7, s9, s10        @ z[3].im
75        vsub.f  s3, s9, s10        @ z[1].im
76        vadd.f  s2, s8, s11        @ z[1].re
77        vsub.f  s6, s8, s11        @ z[3].re
78        @ stall
79        @ stall
80        vstr    d0, [a1, #0*2*4]
81        vstr    d2, [a1, #2*2*4]
82        @ stall
83        @ stall
84        vstr    d1, [a1, #1*2*4]
85        vstr    d3, [a1, #3*2*4]
86
87        bx      lr
88endfunc
89
90.macro macro_fft8_head
91        @ FFT4
92        vldr    d4, [a1, #0 * 2*4]
93        vldr    d6, [a1, #1 * 2*4]
94        vldr    d5, [a1, #2 * 2*4]
95        vldr    d7, [a1, #3 * 2*4]
96            @ BF
97            vldr    d12, [a1, #4 * 2*4]
98        vadd.f  s16, s8, s12    @ vector op
99            vldr    d14, [a1, #5 * 2*4]
100            vldr    d13, [a1, #6 * 2*4]
101            vldr    d15, [a1, #7 * 2*4]
102        vsub.f  s20, s8, s12    @ vector op
103        vadd.f  s0, s16, s18
104        vsub.f  s2, s16, s18
105        vadd.f  s1, s17, s19
106        vsub.f  s3, s17, s19
107        vadd.f  s7, s21, s22
108        vsub.f  s5, s21, s22
109        vadd.f  s4, s20, s23
110        vsub.f  s6, s20, s23
111            vsub.f  s20, s24, s28   @ vector op
112        vstr    d0, [a1, #0 * 2*4]  @ transfer s0-s7 to s24-s31 via memory
113        vstr    d1, [a1, #1 * 2*4]
114        vldr    s0, cos1pi4
115            vadd.f  s16, s24, s28   @ vector op
116        vstr    d2, [a1, #2 * 2*4]
117        vstr    d3, [a1, #3 * 2*4]
118        vldr    d12, [a1, #0 * 2*4]
119            @ TRANSFORM
120            vmul.f  s20, s20, s0    @ vector x scalar op
121        vldr    d13, [a1, #1 * 2*4]
122        vldr    d14, [a1, #2 * 2*4]
123        vldr    d15, [a1, #3 * 2*4]
124        @ BUTTERFLIES
125        vadd.f  s0, s18, s16
126        vadd.f  s1, s17, s19
127        vsub.f  s2, s17, s19
128        vsub.f  s3, s18, s16
129            vadd.f  s4, s21, s20
130            vsub.f  s5, s21, s20
131            vadd.f  s6, s22, s23
132            vsub.f  s7, s22, s23
133        vadd.f  s8, s0, s24         @ vector op
134        vstr    d0, [a1, #0 * 2*4]  @ transfer s0-s3 to s12-s15 via memory
135        vstr    d1, [a1, #1 * 2*4]
136        vldr    d6, [a1, #0 * 2*4]
137        vldr    d7, [a1, #1 * 2*4]
138            vadd.f  s1, s5, s6
139            vadd.f  s0, s7, s4
140            vsub.f  s2, s5, s6
141            vsub.f  s3, s7, s4
142        vsub.f  s12, s24, s12       @ vector op
143            vsub.f  s5, s29, s1
144            vsub.f  s4, s28, s0
145            vsub.f  s6, s30, s2
146            vsub.f  s7, s31, s3
147            vadd.f  s16, s0, s28    @ vector op
148        vstr    d6, [a1, #4 * 2*4]
149        vstr    d7, [a1, #6 * 2*4]
150        vstr    d4, [a1, #0 * 2*4]
151        vstr    d5, [a1, #2 * 2*4]
152             vstr    d2, [a1, #5 * 2*4]
153             vstr    d3, [a1, #7 * 2*4]
154.endm
155
156.macro macro_fft8_tail
157             vstr    d8, [a1, #1 * 2*4]
158             vstr    d9, [a1, #3 * 2*4]
159.endm
160
161function .Lfft8_internal_vfp
162        macro_fft8_head
163        macro_fft8_tail
164        bx      lr
165endfunc
166
167function fft8_vfp
168        ldr     a3, =0x03030000     @ RunFast mode, vector length 4, stride 1
169        fmrx    a2, FPSCR
170        fmxr    FPSCR, a3
171        vpush   {s16-s31}
172        mov     ip, lr
173        bl      .Lfft8_internal_vfp
174        vpop    {s16-s31}
175        fmxr    FPSCR, a2
176        bx      ip
177endfunc
178
179.align 3
180cos1pi4:    @ cos(1*pi/4) = sqrt(2)
181        .float  0.707106769084930419921875
182cos1pi8:    @ cos(1*pi/8) = sqrt(2+sqrt(2))/2
183        .float  0.92387950420379638671875
184cos3pi8:    @ cos(2*pi/8) = sqrt(2-sqrt(2))/2
185        .float  0.3826834261417388916015625
186
187function .Lfft16_internal_vfp
188        macro_fft8_head
189        @ FFT4(z+8)
190        vldr    d10, [a1, #8 * 2*4]
191        vldr    d12, [a1, #9 * 2*4]
192        vldr    d11, [a1, #10 * 2*4]
193        vldr    d13, [a1, #11 * 2*4]
194        macro_fft8_tail
195        vadd.f  s16, s20, s24   @ vector op
196            @ FFT4(z+12)
197            vldr    d4, [a1, #12 * 2*4]
198            vldr    d6, [a1, #13 * 2*4]
199            vldr    d5, [a1, #14 * 2*4]
200        vsub.f  s20, s20, s24   @ vector op
201            vldr    d7, [a1, #15 * 2*4]
202        vadd.f  s0, s16, s18
203        vsub.f  s4, s16, s18
204        vadd.f  s1, s17, s19
205        vsub.f  s5, s17, s19
206        vadd.f  s7, s21, s22
207        vsub.f  s3, s21, s22
208        vadd.f  s2, s20, s23
209        vsub.f  s6, s20, s23
210            vadd.f  s16, s8, s12    @ vector op
211        vstr    d0, [a1, #8 * 2*4]
212        vstr    d2, [a1, #10 * 2*4]
213        vstr    d1, [a1, #9 * 2*4]
214            vsub.f  s20, s8, s12
215        vstr    d3, [a1, #11 * 2*4]
216        @ TRANSFORM(z[2],z[6],z[10],z[14],cos1pi4,cos1pi4)
217        vldr    d12, [a1, #10 * 2*4]
218            vadd.f  s0, s16, s18
219            vadd.f  s1, s17, s19
220            vsub.f  s6, s16, s18
221            vsub.f  s7, s17, s19
222            vsub.f  s3, s21, s22
223            vadd.f  s2, s20, s23
224            vadd.f  s5, s21, s22
225            vsub.f  s4, s20, s23
226            vstr    d0, [a1, #12 * 2*4]
227        vmov    s0, s6
228          @ TRANSFORM(z[1],z[5],z[9],z[13],cos1pi8,cos3pi8)
229          vldr    d6, [a1, #9 * 2*4]
230            vstr    d1, [a1, #13 * 2*4]
231        vldr    d1, cos1pi4 @ s2 = cos1pi4, s3 = cos1pi8
232            vstr    d2, [a1, #15 * 2*4]
233          vldr    d7, [a1, #13 * 2*4]
234        vadd.f  s4, s25, s24
235        vsub.f  s5, s25, s24
236        vsub.f  s6, s0, s7
237        vadd.f  s7, s0, s7
238          vmul.f  s20, s12, s3  @ vector op
239            @ TRANSFORM(z[3],z[7],z[11],z[15],cos3pi8,cos1pi8)
240            vldr    d4, [a1, #11 * 2*4]
241            vldr    d5, [a1, #15 * 2*4]
242            vldr    s1, cos3pi8
243        vmul.f  s24, s4, s2     @ vector * scalar op
244          vmul.f  s28, s12, s1  @ vector * scalar op
245            vmul.f  s12, s8, s1 @ vector * scalar op
246          vadd.f  s4, s20, s29
247          vsub.f  s5, s21, s28
248          vsub.f  s6, s22, s31
249          vadd.f  s7, s23, s30
250            vmul.f  s8, s8, s3  @ vector * scalar op
251          vldr    d8, [a1, #1 * 2*4]
252          vldr    d9, [a1, #5 * 2*4]
253            vldr    d10, [a1, #3 * 2*4]
254            vldr    d11, [a1, #7 * 2*4]
255        vldr    d14, [a1, #2 * 2*4]
256          vadd.f  s0, s6, s4
257          vadd.f  s1, s5, s7
258          vsub.f  s2, s5, s7
259          vsub.f  s3, s6, s4
260            vadd.f  s4, s12, s9
261            vsub.f  s5, s13, s8
262            vsub.f  s6, s14, s11
263            vadd.f  s7, s15, s10
264          vadd.f  s12, s0, s16  @ vector op
265          vstr    d0, [a1, #1 * 2*4]
266          vstr    d1, [a1, #5 * 2*4]
267          vldr    d4, [a1, #1 * 2*4]
268          vldr    d5, [a1, #5 * 2*4]
269            vadd.f  s0, s6, s4
270            vadd.f  s1, s5, s7
271            vsub.f  s2, s5, s7
272            vsub.f  s3, s6, s4
273          vsub.f  s8, s16, s8   @ vector op
274          vstr    d6, [a1, #1 * 2*4]
275          vstr    d7, [a1, #5 * 2*4]
276        vldr    d15, [a1, #6 * 2*4]
277            vsub.f  s4, s20, s0
278            vsub.f  s5, s21, s1
279            vsub.f  s6, s22, s2
280            vsub.f  s7, s23, s3
281            vadd.f  s20, s0, s20    @ vector op
282          vstr    d4, [a1, #9 * 2*4]
283              @ TRANSFORM_ZERO(z[0],z[4],z[8],z[12])
284              vldr    d6, [a1, #8 * 2*4]
285          vstr    d5, [a1, #13 * 2*4]
286              vldr    d7, [a1, #12 * 2*4]
287          vstr    d2, [a1, #11 * 2*4]
288              vldr    d8, [a1, #0 * 2*4]
289          vstr    d3, [a1, #15 * 2*4]
290              vldr    d9, [a1, #4 * 2*4]
291        vadd.f  s0, s26, s24
292        vadd.f  s1, s25, s27
293        vsub.f  s2, s25, s27
294        vsub.f  s3, s26, s24
295              vadd.f  s4, s14, s12
296              vadd.f  s5, s13, s15
297              vsub.f  s6, s13, s15
298              vsub.f  s7, s14, s12
299        vadd.f  s8, s0, s28 @ vector op
300        vstr    d0, [a1, #3 * 2*4]
301        vstr    d1, [a1, #7 * 2*4]
302        vldr    d6, [a1, #3 * 2*4]
303        vldr    d7, [a1, #7 * 2*4]
304              vsub.f  s0, s16, s4
305              vsub.f  s1, s17, s5
306              vsub.f  s2, s18, s6
307              vsub.f  s3, s19, s7
308        vsub.f  s12, s28, s12       @ vector op
309              vadd.f  s16, s4, s16  @ vector op
310            vstr    d10, [a1, #3 * 2*4]
311            vstr    d11, [a1, #7 * 2*4]
312        vstr    d4, [a1, #2 * 2*4]
313        vstr    d5, [a1, #6 * 2*4]
314              vstr    d0, [a1, #8 * 2*4]
315              vstr    d1, [a1, #12 * 2*4]
316        vstr    d6, [a1, #10 * 2*4]
317        vstr    d7, [a1, #14 * 2*4]
318              vstr    d8, [a1, #0 * 2*4]
319              vstr    d9, [a1, #4 * 2*4]
320
321        bx      lr
322endfunc
323
324function ff_fft16_vfp, export=1
325        ldr     a3, =0x03030000     @ RunFast mode, vector length 4, stride 1
326        fmrx    a2, FPSCR
327        fmxr    FPSCR, a3
328        vpush   {s16-s31}
329        mov     ip, lr
330        bl      .Lfft16_internal_vfp
331        vpop    {s16-s31}
332        fmxr    FPSCR, a2
333        bx      ip
334endfunc
335
336.macro pass n, z0, z1, z2, z3
337        add     v6, v5, #4*2*\n
338        @ TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3])
339            @ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1])
340                @ TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0])
341                    @ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1])
342            vldr    d8, [\z2, #8*(o2+1)]        @ s16,s17
343            vldmdb  v6!, {s2}
344            vldr    d9, [\z3, #8*(o3+1)]        @ s18,s19
345            vldmia  v5!, {s0,s1}                @ s0 is unused
346        vldr    s7, [\z2, #8*o2]            @ t1
347            vmul.f  s20, s16, s2                @ vector * scalar
348        vldr    s0, [\z3, #8*o3]            @ t5
349        vldr    s6, [\z2, #8*o2+4]          @ t2
350        vldr    s3, [\z3, #8*o3+4]          @ t6
351            vmul.f  s16, s16, s1                @ vector * scalar
352        ldr     a4, =\n-1
3531:      add     \z0, \z0, #8*2
354 .if \n*4*2 >= 512
355        add     \z1, \z1, #8*2
356 .endif
357 .if \n*4*2 >= 256
358        add     \z2, \z2, #8*2
359 .endif
360 .if \n*4*2 >= 512
361        add     \z3, \z3, #8*2
362 .endif
363        @ up to 2 stalls (VFP vector issuing / waiting for s0)
364        @ depending upon whether this is the first iteration and
365        @ how many add instructions are inserted above
366        vadd.f  s4, s0, s7                  @ t5
367        vadd.f  s5, s6, s3                  @ t6
368        vsub.f  s6, s6, s3                  @ t4
369        vsub.f  s7, s0, s7                  @ t3
370        vldr    d6, [\z0, #8*0-8*2]         @ s12,s13
371            vadd.f  s0, s16, s21                @ t1
372        vldr    d7, [\z1, #8*o1-8*2]        @ s14,s15
373            vsub.f  s1, s18, s23                @ t5
374        vadd.f  s8, s4, s12                 @ vector + vector
375        @ stall (VFP vector issuing)
376        @ stall (VFP vector issuing)
377        @ stall (VFP vector issuing)
378        vsub.f  s4, s12, s4
379        vsub.f  s5, s13, s5
380        vsub.f  s6, s14, s6
381        vsub.f  s7, s15, s7
382            vsub.f  s2, s17, s20                @ t2
383            vadd.f  s3, s19, s22                @ t6
384        vstr    d4, [\z0, #8*0-8*2]         @ s8,s9
385        vstr    d5, [\z1, #8*o1-8*2]        @ s10,s11
386        @ stall (waiting for s5)
387        vstr    d2, [\z2, #8*o2-8*2]        @ s4,s5
388            vadd.f  s4, s1, s0                  @ t5
389        vstr    d3, [\z3, #8*o3-8*2]        @ s6,s7
390            vsub.f  s7, s1, s0                  @ t3
391            vadd.f  s5, s2, s3                  @ t6
392            vsub.f  s6, s2, s3                  @ t4
393            vldr    d6, [\z0, #8*1-8*2]         @ s12,s13
394            vldr    d7, [\z1, #8*(o1+1)-8*2]    @ s14,s15
395                vldr    d4, [\z2, #8*o2]            @ s8,s9
396                vldmdb  v6!, {s2,s3}
397                vldr    d5, [\z3, #8*o3]            @ s10,s11
398            vadd.f  s20, s4, s12                @ vector + vector
399                vldmia  v5!, {s0,s1}
400                    vldr    d8, [\z2, #8*(o2+1)]        @ s16,s17
401            @ stall (VFP vector issuing)
402            vsub.f  s4, s12, s4
403            vsub.f  s5, s13, s5
404            vsub.f  s6, s14, s6
405            vsub.f  s7, s15, s7
406                vmul.f  s12, s8, s3                 @ vector * scalar
407            vstr    d10, [\z0, #8*1-8*2]        @ s20,s21
408                    vldr    d9, [\z3, #8*(o3+1)]        @ s18,s19
409            vstr    d11, [\z1, #8*(o1+1)-8*2]   @ s22,s23
410                vmul.f  s8, s8, s0                  @ vector * scalar
411            vstr    d2, [\z2, #8*(o2+1)-8*2]    @ s4,s5
412            @ stall (waiting for s7)
413            vstr    d3, [\z3, #8*(o3+1)-8*2]    @ s6,s7
414                    vmul.f  s20, s16, s2                @ vector * scalar
415                @ stall (VFP vector issuing)
416                @ stall (VFP vector issuing)
417                @ stall (VFP vector issuing)
418                vadd.f  s7, s8, s13                 @ t1
419                vsub.f  s6, s9, s12                 @ t2
420                vsub.f  s0, s10, s15                @ t5
421                vadd.f  s3, s11, s14                @ t6
422                    vmul.f  s16, s16, s1                @ vector * scalar
423        subs    a4, a4, #1
424        bne     1b
425        @ What remains is identical to the first two indentations of
426        @ the above, but without the increment of z
427        vadd.f  s4, s0, s7                  @ t5
428        vadd.f  s5, s6, s3                  @ t6
429        vsub.f  s6, s6, s3                  @ t4
430        vsub.f  s7, s0, s7                  @ t3
431        vldr    d6, [\z0, #8*0]             @ s12,s13
432            vadd.f  s0, s16, s21                @ t1
433        vldr    d7, [\z1, #8*o1]            @ s14,s15
434            vsub.f  s1, s18, s23                @ t5
435        vadd.f  s8, s4, s12                 @ vector + vector
436        vsub.f  s4, s12, s4
437        vsub.f  s5, s13, s5
438        vsub.f  s6, s14, s6
439        vsub.f  s7, s15, s7
440            vsub.f  s2, s17, s20                @ t2
441            vadd.f  s3, s19, s22                @ t6
442        vstr    d4, [\z0, #8*0]             @ s8,s9
443        vstr    d5, [\z1, #8*o1]            @ s10,s11
444        vstr    d2, [\z2, #8*o2]            @ s4,s5
445            vadd.f  s4, s1, s0                  @ t5
446        vstr    d3, [\z3, #8*o3]            @ s6,s7
447            vsub.f  s7, s1, s0                  @ t3
448            vadd.f  s5, s2, s3                  @ t6
449            vsub.f  s6, s2, s3                  @ t4
450            vldr    d6, [\z0, #8*1]             @ s12,s13
451            vldr    d7, [\z1, #8*(o1+1)]        @ s14,s15
452            vadd.f  s20, s4, s12                @ vector + vector
453            vsub.f  s4, s12, s4
454            vsub.f  s5, s13, s5
455            vsub.f  s6, s14, s6
456            vsub.f  s7, s15, s7
457            vstr    d10, [\z0, #8*1]            @ s20,s21
458            vstr    d11, [\z1, #8*(o1+1)]       @ s22,s23
459            vstr    d2, [\z2, #8*(o2+1)]        @ s4,s5
460            vstr    d3, [\z3, #8*(o3+1)]        @ s6,s7
461.endm
462
463.macro  def_fft n, n2, n4
464function .Lfft\n\()_internal_vfp
465 .if \n >= 512
466        push    {v1-v6,lr}
467 .elseif \n >= 256
468        push    {v1-v2,v5-v6,lr}
469 .else
470        push    {v1,v5-v6,lr}
471 .endif
472        mov     v1, a1
473        bl      .Lfft\n2\()_internal_vfp
474        add     a1, v1, #8*(\n/4)*2
475        bl      .Lfft\n4\()_internal_vfp
476        movrelx v5, X(ff_cos_\n), a1
477        add     a1, v1, #8*(\n/4)*3
478        bl      .Lfft\n4\()_internal_vfp
479 .if \n >= 512
480  .set o1, 0*(\n/4/2)
481  .set o2, 0*(\n/4/2)
482  .set o3, 0*(\n/4/2)
483        add     v2, v1, #8*2*(\n/4/2)
484        add     v3, v1, #8*4*(\n/4/2)
485        add     v4, v1, #8*6*(\n/4/2)
486        pass    (\n/4/2), v1, v2, v3, v4
487        pop     {v1-v6,pc}
488 .elseif \n >= 256
489  .set o1, 2*(\n/4/2)
490  .set o2, 0*(\n/4/2)
491  .set o3, 2*(\n/4/2)
492        add     v2, v1, #8*4*(\n/4/2)
493        pass    (\n/4/2), v1, v1, v2, v2
494        pop     {v1-v2,v5-v6,pc}
495 .else
496  .set o1, 2*(\n/4/2)
497  .set o2, 4*(\n/4/2)
498  .set o3, 6*(\n/4/2)
499        pass    (\n/4/2), v1, v1, v1, v1
500        pop     {v1,v5-v6,pc}
501 .endif
502endfunc
503
504function fft\n\()_vfp
505        ldr     a3, =0x03030000 /* RunFast mode, vector length 4, stride 1 */
506        fmrx    a2, FPSCR
507        fmxr    FPSCR, a3
508        vpush   {s16-s31}
509        mov     ip, lr
510        bl      .Lfft\n\()_internal_vfp
511        vpop    {s16-s31}
512        fmxr    FPSCR, a2
513        bx      ip
514endfunc
515
516.ltorg
517.endm
518
519        def_fft    32,    16,     8
520        def_fft    64,    32,    16
521        def_fft   128,    64,    32
522        def_fft   256,   128,    64
523        def_fft   512,   256,   128
524        def_fft  1024,   512,   256
525        def_fft  2048,  1024,   512
526        def_fft  4096,  2048,  1024
527        def_fft  8192,  4096,  2048
528        def_fft 16384,  8192,  4096
529        def_fft 32768, 16384,  8192
530        def_fft 65536, 32768, 16384
531