• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * fft.c
13  *
14  * Fast Fourier Transform
15  *
16  */
17 
18 
19 #include "fft.h"
20 
21 const int16_t kSortTabFft[240] = {
22   0, 60, 120, 180, 20, 80, 140, 200, 40, 100, 160, 220,
23   4, 64, 124, 184, 24, 84, 144, 204, 44, 104, 164, 224,
24   8, 68, 128, 188, 28, 88, 148, 208, 48, 108, 168, 228,
25   12, 72, 132, 192, 32, 92, 152, 212, 52, 112, 172, 232,
26   16, 76, 136, 196, 36, 96, 156, 216, 56, 116, 176, 236,
27   1, 61, 121, 181, 21, 81, 141, 201, 41, 101, 161, 221,
28   5, 65, 125, 185, 25, 85, 145, 205, 45, 105, 165, 225,
29   9, 69, 129, 189, 29, 89, 149, 209, 49, 109, 169, 229,
30   13, 73, 133, 193, 33, 93, 153, 213, 53, 113, 173, 233,
31   17, 77, 137, 197, 37, 97, 157, 217, 57, 117, 177, 237,
32   2, 62, 122, 182, 22, 82, 142, 202, 42, 102, 162, 222,
33   6, 66, 126, 186, 26, 86, 146, 206, 46, 106, 166, 226,
34   10, 70, 130, 190, 30, 90, 150, 210, 50, 110, 170, 230,
35   14, 74, 134, 194, 34, 94, 154, 214, 54, 114, 174, 234,
36   18, 78, 138, 198, 38, 98, 158, 218, 58, 118, 178, 238,
37   3, 63, 123, 183, 23, 83, 143, 203, 43, 103, 163, 223,
38   7, 67, 127, 187, 27, 87, 147, 207, 47, 107, 167, 227,
39   11, 71, 131, 191, 31, 91, 151, 211, 51, 111, 171, 231,
40   15, 75, 135, 195, 35, 95, 155, 215, 55, 115, 175, 235,
41   19, 79, 139, 199, 39, 99, 159, 219, 59, 119, 179, 239
42 };
43 
44 /* Cosine table in Q14 */
45 const int16_t kCosTabFfftQ14[240] = {
46   16384,  16378, 16362,   16333,  16294,  16244,  16182,  16110,  16026,  15931,  15826,  15709,
47   15582,  15444, 15296,   15137,  14968,  14788,  14598,  14399,  14189,  13970,  13741,  13502,
48   13255,  12998, 12733,   12458,  12176,  11885,  11585,  11278,  10963,  10641,  10311,   9974,
49   9630,   9280,  8923,    8561,   8192,   7818,   7438,   7053,   6664,   6270,   5872,   5469,
50   5063,   4653,  4240,    3825,   3406,   2986,   2563,   2139,   1713,   1285,    857,    429,
51   0,   -429,  -857,   -1285,  -1713,  -2139,  -2563,  -2986,  -3406,  -3825,  -4240,  -4653,
52   -5063,  -5469, -5872,   -6270,  -6664,  -7053,  -7438,  -7818,  -8192,  -8561,  -8923,  -9280,
53   -9630,  -9974, -10311, -10641, -10963, -11278, -11585, -11885, -12176, -12458, -12733, -12998,
54   -13255, -13502, -13741, -13970, -14189, -14399, -14598, -14788, -14968, -15137, -15296, -15444,
55   -15582, -15709, -15826, -15931, -16026, -16110, -16182, -16244, -16294, -16333, -16362, -16378,
56   -16384, -16378, -16362, -16333, -16294, -16244, -16182, -16110, -16026, -15931, -15826, -15709,
57   -15582, -15444, -15296, -15137, -14968, -14788, -14598, -14399, -14189, -13970, -13741, -13502,
58   -13255, -12998, -12733, -12458, -12176, -11885, -11585, -11278, -10963, -10641, -10311,  -9974,
59   -9630,  -9280,  -8923,  -8561,  -8192,  -7818,  -7438,  -7053,  -6664,  -6270,  -5872,  -5469,
60   -5063,  -4653,  -4240,  -3825,  -3406,  -2986,  -2563,  -2139,  -1713,  -1285,   -857,   -429,
61   0,    429,    857,   1285,   1713,   2139,   2563,   2986,   3406,   3825,   4240,   4653,
62   5063,   5469,   5872,   6270,   6664,   7053,   7438,   7818,   8192,   8561,   8923,   9280,
63   9630,   9974,  10311,  10641,  10963,  11278,  11585,  11885,  12176,  12458,  12733,  12998,
64   13255,  13502,  13741,  13970,  14189,  14399,  14598,  14788,  14968,  15137,  15296,  15444,
65   15582,  15709,  15826,  15931,  16026,  16110,  16182,  16244,  16294,  16333,  16362,  16378
66 };
67 
68 
69 
70 /* Uses 16x16 mul, without rounding, which is faster. Uses WEBRTC_SPL_MUL_16_16_RSFT */
WebRtcIsacfix_FftRadix16Fastest(int16_t RexQx[],int16_t ImxQx[],int16_t iSign)71 int16_t WebRtcIsacfix_FftRadix16Fastest(int16_t RexQx[], int16_t ImxQx[], int16_t iSign) {
72 
73   int16_t dd, ee, ff, gg, hh, ii;
74   int16_t k0, k1, k2, k3, k4, kk;
75   int16_t tmp116, tmp216;
76 
77   int16_t ccc1Q14, ccc2Q14, ccc3Q14, sss1Q14, sss2Q14, sss3Q14;
78   int16_t sss60Q14, ccc72Q14, sss72Q14;
79   int16_t aaQx, ajQx, akQx, ajmQx, ajpQx, akmQx, akpQx;
80   int16_t bbQx, bjQx, bkQx, bjmQx, bjpQx, bkmQx, bkpQx;
81 
82   int16_t ReDATAQx[240],  ImDATAQx[240];
83 
84   sss60Q14 = kCosTabFfftQ14[20];
85   ccc72Q14 = kCosTabFfftQ14[48];
86   sss72Q14 = kCosTabFfftQ14[12];
87 
88   if (iSign < 0) {
89     sss72Q14 = -sss72Q14;
90     sss60Q14 = -sss60Q14;
91   }
92   /* Complexity is: 10 cycles */
93 
94   /* compute fourier transform */
95 
96   // transform for factor of 4
97   for (kk=0; kk<60; kk++) {
98     k0 = kk;
99     k1 = k0 + 60;
100     k2 = k1 + 60;
101     k3 = k2 + 60;
102 
103     akpQx = RexQx[k0] + RexQx[k2];
104     akmQx = RexQx[k0] - RexQx[k2];
105     ajpQx = RexQx[k1] + RexQx[k3];
106     ajmQx = RexQx[k1] - RexQx[k3];
107     bkpQx = ImxQx[k0] + ImxQx[k2];
108     bkmQx = ImxQx[k0] - ImxQx[k2];
109     bjpQx = ImxQx[k1] + ImxQx[k3];
110     bjmQx = ImxQx[k1] - ImxQx[k3];
111 
112     RexQx[k0] = akpQx + ajpQx;
113     ImxQx[k0] = bkpQx + bjpQx;
114     ajpQx = akpQx - ajpQx;
115     bjpQx = bkpQx - bjpQx;
116     if (iSign < 0) {
117       akpQx = akmQx + bjmQx;
118       bkpQx = bkmQx - ajmQx;
119       akmQx -= bjmQx;
120       bkmQx += ajmQx;
121     } else {
122       akpQx = akmQx - bjmQx;
123       bkpQx = bkmQx + ajmQx;
124       akmQx += bjmQx;
125       bkmQx -= ajmQx;
126     }
127 
128     ccc1Q14 = kCosTabFfftQ14[kk];
129     ccc2Q14 = kCosTabFfftQ14[2 * kk];
130     ccc3Q14 = kCosTabFfftQ14[3 * kk];
131     sss1Q14 = kCosTabFfftQ14[kk + 60];
132     sss2Q14 = kCosTabFfftQ14[2 * kk + 60];
133     sss3Q14 = kCosTabFfftQ14[3 * kk + 60];
134     if (iSign==1) {
135       sss1Q14 = -sss1Q14;
136       sss2Q14 = -sss2Q14;
137       sss3Q14 = -sss3Q14;
138     }
139 
140     //Do several multiplications like Q14*Q16>>14 = Q16
141     // RexQ16[k1] = akpQ16 * ccc1Q14 - bkpQ16 * sss1Q14;
142     // RexQ16[k2] = ajpQ16 * ccc2Q14 - bjpQ16 * sss2Q14;
143     // RexQ16[k3] = akmQ16 * ccc3Q14 - bkmQ16 * sss3Q14;
144     // ImxQ16[k1] = akpQ16 * sss1Q14 + bkpQ16 * ccc1Q14;
145     // ImxQ16[k2] = ajpQ16 * sss2Q14 + bjpQ16 * ccc2Q14;
146     // ImxQ16[k3] = akmQ16 * sss3Q14 + bkmQ16 * ccc3Q14;
147 
148     RexQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, akpQx, 14) -
149         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, bkpQx, 14); // 6 non-mul + 2 mul cycles, i.e. 8 cycles (6+2*7=20 cycles if 16x32mul)
150     RexQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14) -
151         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjpQx, 14);
152     RexQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, akmQx, 14) -
153         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, bkmQx, 14);
154     ImxQx[k1] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss1Q14, akpQx, 14) +
155         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc1Q14, bkpQx, 14);
156     ImxQx[k2] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajpQx, 14) +
157         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14);
158     ImxQx[k3] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss3Q14, akmQx, 14) +
159         (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc3Q14, bkmQx, 14);
160     //This mul segment needs 6*8 = 48 cycles for 16x16 muls, but 6*20 = 120 cycles for 16x32 muls
161 
162 
163   }
164   /* Complexity is: 51+48 = 99 cycles for 16x16 muls, but 51+120 = 171 cycles for 16x32 muls*/
165 
166   // transform for factor of 3
167   kk=0;
168   k1=20;
169   k2=40;
170 
171   for (hh=0; hh<4; hh++) {
172     for (ii=0; ii<20; ii++) {
173       akQx = RexQx[kk];
174       bkQx = ImxQx[kk];
175       ajQx = RexQx[k1] + RexQx[k2];
176       bjQx = ImxQx[k1] + ImxQx[k2];
177       RexQx[kk] = akQx + ajQx;
178       ImxQx[kk] = bkQx + bjQx;
179       tmp116 = ajQx >> 1;
180       tmp216 = bjQx >> 1;
181       akQx = akQx - tmp116;
182       bkQx = bkQx - tmp216;
183       tmp116 = RexQx[k1] - RexQx[k2];
184       tmp216 = ImxQx[k1] - ImxQx[k2];
185 
186       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp116, 14); // Q14*Qx>>14 = Qx
187       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss60Q14, tmp216, 14); // Q14*Qx>>14 = Qx
188       RexQx[k1] = akQx - bjQx;
189       RexQx[k2] = akQx + bjQx;
190       ImxQx[k1] = bkQx + ajQx;
191       ImxQx[k2] = bkQx - ajQx;
192 
193       kk++;
194       k1++;
195       k2++;
196     }
197     /* Complexity : (31+6)*20 = 740 cycles for 16x16 muls, but (31+18)*20 = 980 cycles for 16x32 muls*/
198     kk=kk+40;
199     k1=k1+40;
200     k2=k2+40;
201   }
202   /* Complexity : 4*(740+3) = 2972 cycles for 16x16 muls, but 4*(980+3) = 3932 cycles for 16x32 muls*/
203 
204   /* multiply by rotation factor for odd factor 3 or 5 (not for 4)
205      Same code (duplicated) for both ii=2 and ii=3 */
206   kk = 1;
207   ee = 0;
208   ff = 0;
209 
210   for (gg=0; gg<19; gg++) {
211     kk += 20;
212     ff = ff+4;
213     for (hh=0; hh<2; hh++) {
214       ee = ff + hh * ff;
215       dd = ee + 60;
216       ccc2Q14 = kCosTabFfftQ14[ee];
217       sss2Q14 = kCosTabFfftQ14[dd];
218       if (iSign==1) {
219         sss2Q14 = -sss2Q14;
220       }
221       for (ii=0; ii<4; ii++) {
222         akQx = RexQx[kk];
223         bkQx = ImxQx[kk];
224         RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) - // Q14*Qx>>14 = Qx
225             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14);
226         ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) + // Q14*Qx>>14 = Qx
227             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14);
228 
229 
230         kk += 60;
231       }
232       kk = kk - 220;
233     }
234     // Complexity: 2*(13+5+4*13+2) = 144 for 16x16 muls, but 2*(13+5+4*33+2) = 304 cycles for 16x32 muls
235     kk = kk - 59;
236   }
237   // Complexity: 19*144 = 2736 for 16x16 muls, but 19*304 = 5776 cycles for 16x32 muls
238 
239   // transform for factor of 5
240   kk = 0;
241   ccc2Q14 = kCosTabFfftQ14[96];
242   sss2Q14 = kCosTabFfftQ14[84];
243   if (iSign==1) {
244     sss2Q14 = -sss2Q14;
245   }
246 
247   for (hh=0; hh<4; hh++) {
248     for (ii=0; ii<12; ii++) {
249       k1 = kk + 4;
250       k2 = k1 + 4;
251       k3 = k2 + 4;
252       k4 = k3 + 4;
253 
254       akpQx = RexQx[k1] + RexQx[k4];
255       akmQx = RexQx[k1] - RexQx[k4];
256       bkpQx = ImxQx[k1] + ImxQx[k4];
257       bkmQx = ImxQx[k1] - ImxQx[k4];
258       ajpQx = RexQx[k2] + RexQx[k3];
259       ajmQx = RexQx[k2] - RexQx[k3];
260       bjpQx = ImxQx[k2] + ImxQx[k3];
261       bjmQx = ImxQx[k2] - ImxQx[k3];
262       aaQx = RexQx[kk];
263       bbQx = ImxQx[kk];
264       RexQx[kk] = aaQx + akpQx + ajpQx;
265       ImxQx[kk] = bbQx + bkpQx + bjpQx;
266 
267       akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, akpQx, 14) +
268           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, ajpQx, 14)  + aaQx;
269       bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bkpQx, 14) +
270           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bjpQx, 14)  + bbQx;
271       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, akmQx, 14) +
272           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, ajmQx, 14);
273       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bkmQx, 14) +
274           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bjmQx, 14);
275       // 32+4*8=64 or 32+4*20=112
276 
277       RexQx[k1] = akQx - bjQx;
278       RexQx[k4] = akQx + bjQx;
279       ImxQx[k1] = bkQx + ajQx;
280       ImxQx[k4] = bkQx - ajQx;
281 
282       akQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akpQx, 14)  +
283           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, ajpQx, 14) + aaQx;
284       bkQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkpQx, 14)  +
285           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc72Q14, bjpQx, 14) + bbQx;
286       ajQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akmQx, 14) -
287           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, ajmQx, 14);
288       bjQx = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkmQx, 14) -
289           (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss72Q14, bjmQx, 14);
290       // 8+4*8=40 or 8+4*20=88
291 
292       RexQx[k2] = akQx - bjQx;
293       RexQx[k3] = akQx + bjQx;
294       ImxQx[k2] = bkQx + ajQx;
295       ImxQx[k3] = bkQx - ajQx;
296 
297       kk = k4 + 4;
298     }
299     // Complexity: 12*(64+40+10) = 1368 for 16x16 muls, but 12*(112+88+10) = 2520 cycles for 16x32 muls
300     kk -= 239;
301   }
302   // Complexity: 4*1368 = 5472 for 16x16 muls, but 4*2520 = 10080 cycles for 16x32 muls
303 
304   /* multiply by rotation factor for odd factor 3 or 5 (not for 4)
305      Same code (duplicated) for both ii=2 and ii=3 */
306   kk = 1;
307   ee=0;
308 
309   for (gg=0; gg<3; gg++) {
310     kk += 4;
311     dd = 12 + 12 * gg;
312     ff = 0;
313     for (hh=0; hh<4; hh++) {
314       ff = ff+dd;
315       ee = ff+60;
316       for (ii=0; ii<12; ii++) {
317         akQx = RexQx[kk];
318         bkQx = ImxQx[kk];
319 
320         ccc2Q14 = kCosTabFfftQ14[ff];
321         sss2Q14 = kCosTabFfftQ14[ee];
322 
323         if (iSign==1) {
324           sss2Q14 = -sss2Q14;
325         }
326 
327         RexQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, akQx, 14) -
328             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, bkQx, 14);
329         ImxQx[kk] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(sss2Q14, akQx, 14) +
330             (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(ccc2Q14, bkQx, 14);
331 
332         kk += 20;
333       }
334       kk = kk - 236;
335       // Complexity: 12*(12+12) = 288 for 16x16 muls, but 12*(12+32) = 528 cycles for 16x32 muls
336     }
337     kk = kk - 19;
338     // Complexity: 4*288+6 for 16x16 muls, but 4*528+6 cycles for 16x32 muls
339   }
340   // Complexity: 3*4*288+6 = 3462 for 16x16 muls, but 3*4*528+6 = 6342 cycles for 16x32 muls
341 
342 
343   // last transform for factor of 4 */
344   for (kk=0; kk<240; kk=kk+4) {
345     k1 = kk + 1;
346     k2 = k1 + 1;
347     k3 = k2 + 1;
348 
349     akpQx = RexQx[kk] + RexQx[k2];
350     akmQx = RexQx[kk] - RexQx[k2];
351     ajpQx = RexQx[k1] + RexQx[k3];
352     ajmQx = RexQx[k1] - RexQx[k3];
353     bkpQx = ImxQx[kk] + ImxQx[k2];
354     bkmQx = ImxQx[kk] - ImxQx[k2];
355     bjpQx = ImxQx[k1] + ImxQx[k3];
356     bjmQx = ImxQx[k1] - ImxQx[k3];
357     RexQx[kk] = akpQx + ajpQx;
358     ImxQx[kk] = bkpQx + bjpQx;
359     ajpQx = akpQx - ajpQx;
360     bjpQx = bkpQx - bjpQx;
361     if (iSign < 0) {
362       akpQx = akmQx + bjmQx;
363       bkpQx = bkmQx - ajmQx;
364       akmQx -= bjmQx;
365       bkmQx += ajmQx;
366     } else {
367       akpQx = akmQx - bjmQx;
368       bkpQx = bkmQx + ajmQx;
369       akmQx += bjmQx;
370       bkmQx -= ajmQx;
371     }
372     RexQx[k1] = akpQx;
373     RexQx[k2] = ajpQx;
374     RexQx[k3] = akmQx;
375     ImxQx[k1] = bkpQx;
376     ImxQx[k2] = bjpQx;
377     ImxQx[k3] = bkmQx;
378   }
379   // Complexity: 60*45 = 2700 for 16x16 muls, but 60*45 = 2700 cycles for 16x32 muls
380 
381   /* permute the results to normal order */
382   for (ii=0; ii<240; ii++) {
383     ReDATAQx[ii]=RexQx[ii];
384     ImDATAQx[ii]=ImxQx[ii];
385   }
386   // Complexity: 240*2=480 cycles
387 
388   for (ii=0; ii<240; ii++) {
389     RexQx[ii]=ReDATAQx[kSortTabFft[ii]];
390     ImxQx[ii]=ImDATAQx[kSortTabFft[ii]];
391   }
392   // Complexity: 240*2*2=960 cycles
393 
394   // Total complexity:
395   //            16x16 16x32
396   // Complexity:   10    10
397   // Complexity:   99   171
398   // Complexity: 2972  3932
399   // Complexity: 2736  5776
400   // Complexity: 5472 10080
401   // Complexity: 3462  6342
402   // Complexity: 2700  2700
403   // Complexity:  480   480
404   // Complexity:  960   960
405   // =======================
406   //            18891 30451
407   //
408   // If this FFT is called 2 time each frame, i.e. 67 times per second, it will correspond to
409   // a C54 complexity of 67*18891/1000000 = 1.27 MIPS with 16x16-muls, and 67*30451/1000000 =
410   // = 2.04 MIPS with 16x32-muls. Note that this routine somtimes is called 6 times during the
411   // encoding of a frame, i.e. the max complexity would be 7/2*1.27 = 4.4 MIPS for the 16x16 mul case.
412 
413 
414   return 0;
415 }
416