1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /*
19
20 Pathname: ./src/fft_rx4_short.c
21 Funtions: fft_rx4_short
22
23 ------------------------------------------------------------------------------
24 REVISION HISTORY
25
26 Description:
27 (1) Eliminated search for max in the main loop.
28 (2) Simplified the function by eliminating different conditions
29 for exp.
30 (3) Reduced precision on w_64rx4 from Q15 to Q12, so now the
31 input can be as high as 1.0 and saturation will not occurre
32 because the accumulation times the new Q12 format will never
33 exceed 31 bits.
34
35 Description:
36 (1) Added comment to explain max search elimination and
37 Q format during multiplications
38 (2) Increased down shift from 1 to 2, to ensure that 32-bit
39 numbers will not overflow when 2 consecutive adds are done
40 This was found during code review.
41
42 Who: Date:
43 Description:
44
45 ------------------------------------------------------------------------------
46 INPUT AND OUTPUT DEFINITIONS
47
48 Inputs:
49 Data = Input complex vector, arranged in the following order:
50 real, imag, real, imag...
51 This is a complex vector whose elements (real and Imag) are
52 Int32.
53 type Int32 *
54
55 peak_value = Input, peak value of the input vector
56 Output, peak value of the resulting vector
57 type Int32 *
58
59 Local Stores/Buffers/Pointers Needed:
60 None
61
62 Global Stores/Buffers/Pointers Needed:
63 None
64
65 Outputs:
66 exponent returns a shift to compensate the scaling introduced by
67 overflow protection
68
69 Pointers and Buffers Modified:
70 calculation are done in-place and returned in Data
71
72 Local Stores Modified:
73 None
74
75 Global Stores Modified:
76 None
77
78 ------------------------------------------------------------------------------
79 FUNCTION DESCRIPTION
80
81 Fast Fourier Transform, radix 4 with Decimation in Frequency and block
82 floating point arithmetic.
83 The radix-4 FFT simply divides the FFT into four smaller FFTs. Each of
84 the smaller FFTs is then further divided into smaller ones and so on.
85 It consists of log 4 N stages and each stage consists of N/4 dragonflies.
86
87 An FFT is nothing but a bundle of multiplications and summations which
88 may overflow during calculations.
89
90
91 This routine uses a scheme to test and scale the result output from
92 each FFT stage in order to fix the accumulation overflow.
93
94 The Input Data should be in Q13 format to get the highest precision.
95 At the end of each dragonfly calculation, a test for possible bit growth
96 is made, if bit growth is possible the Data is scale down back to Q13.
97
98
99 ------------------------------------------------------------------------------
100 REQUIREMENTS
101
102 This function should provide a fixed point FFT for an input array
103 of size 64.
104
105 ------------------------------------------------------------------------------
106 REFERENCES
107
108 [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling,
109 C. Nikias, Macmillan Pub. Co.
110
111 ------------------------------------------------------------------------------
112 PSEUDO-CODE
113
114
115 MODIFY( x[] )
116 RETURN( exponent )
117
118 ------------------------------------------------------------------------------
119 RESOURCES USED
120 When the code is written for a specific target processor the
121 the resources used should be documented below.
122
123 STACK USAGE: [stack count for this module] + [variable to represent
124 stack usage for each subroutine called]
125
126 where: [stack usage variable] = stack usage for [subroutine
127 name] (see [filename].ext)
128
129 DATA MEMORY USED: x words
130
131 PROGRAM MEMORY USED: x words
132
133 CLOCK CYCLES: [cycle count equation for this module] + [variable
134 used to represent cycle count for each subroutine
135 called]
136
137 where: [cycle count variable] = cycle count for [subroutine
138 name] (see [filename].ext)
139
140 ------------------------------------------------------------------------------
141 */
142 /*----------------------------------------------------------------------------
143 ; INCLUDES
144 ----------------------------------------------------------------------------*/
145
146 #include "pv_audio_type_defs.h"
147 #include "fft_rx4.h"
148 #include "pv_normalize.h"
149 #include "fxp_mul32.h"
150
151 /*----------------------------------------------------------------------------
152 ; MACROS
153 ; Define module specific macros here
154 ----------------------------------------------------------------------------*/
155
156 /*----------------------------------------------------------------------------
157 ; DEFINES
158 ; Include all pre-processor statements here. Include conditional
159 ; compile variables also.
160 ----------------------------------------------------------------------------*/
161
162 /*----------------------------------------------------------------------------
163 ; LOCAL FUNCTION DEFINITIONS
164 ; Function Prototype declaration
165 ----------------------------------------------------------------------------*/
166
167 /*----------------------------------------------------------------------------
168 ; LOCAL VARIABLE DEFINITIONS
169 ; Variable declaration - defined here and used outside this module
170 ----------------------------------------------------------------------------*/
171
172 /*----------------------------------------------------------------------------
173 ; EXTERNAL FUNCTION REFERENCES
174 ; Declare functions defined elsewhere and referenced in this module
175 ----------------------------------------------------------------------------*/
176
177 /*----------------------------------------------------------------------------
178 ; EXTERNAL VARIABLES REFERENCES
179 ; Declare variables used in this module but defined elsewhere
180 ----------------------------------------------------------------------------*/
181
182 /*----------------------------------------------------------------------------
183 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
184 ; Declare variables used in this module but defined elsewhere
185 ----------------------------------------------------------------------------*/
186
187 /*----------------------------------------------------------------------------
188 ; FUNCTION CODE
189 ----------------------------------------------------------------------------*/
190
fft_rx4_short(Int32 Data[],Int32 * peak_value)191 Int fft_rx4_short(
192 Int32 Data[],
193 Int32 *peak_value)
194
195 {
196 Int n1;
197 Int n2;
198 Int n3;
199 Int j;
200 Int k;
201 Int i;
202 Int32 exp_jw1;
203 Int32 exp_jw2;
204 Int32 exp_jw3;
205
206
207 Int32 t1;
208 Int32 t2;
209 Int32 r1;
210 Int32 r2;
211 Int32 r3;
212 Int32 s1;
213 Int32 s2;
214 Int32 s3;
215
216 Int32 *pData1;
217 Int32 *pData2;
218 Int32 *pData3;
219 Int32 *pData4;
220 const Int32 *pw;
221 Int32 temp1;
222 Int32 temp2;
223 Int32 temp3;
224 Int32 temp4;
225 Int32 max;
226 Int exp;
227 Int exponent = 0;
228 Int shift;
229
230
231 max = *peak_value;
232 exp = 0;
233
234 if (max > 0x008000)
235 {
236 exp = 8 - pv_normalize(max); /* use 24 bits */
237
238 exponent = exp; /* keeps track of # of shifts */
239
240 }
241
242 n2 = FFT_RX4_SHORT;
243
244 pw = W_64rx4;
245
246
247 /* shift down to avoid possible overflow in first pass of the loop */
248 shift = 2;
249
250 for (k = FFT_RX4_SHORT; k > 4; k >>= 2)
251 {
252
253 n1 = n2;
254 n2 >>= 2;
255 n3 = n1 >> 1;
256
257 exp -= 2;
258
259 for (i = 0; i < FFT_RX4_SHORT; i += n1)
260 {
261 pData1 = &Data[ i<<1];
262 pData3 = pData1 + n3;
263 pData2 = pData1 + n1;
264 pData4 = pData3 + n1;
265
266 temp1 = *(pData1);
267 temp2 = *(pData2);
268 temp1 >>= shift;
269 temp2 >>= shift;
270
271 r1 = temp1 + temp2;
272 r2 = temp1 - temp2;
273
274 temp3 = *(pData3++);
275 temp4 = *(pData4++);
276 temp3 >>= shift;
277 temp4 >>= shift;
278
279 t1 = temp3 + temp4;
280 t2 = temp3 - temp4;
281
282 *(pData1++) = (r1 + t1) >> exp;
283 *(pData2++) = (r1 - t1) >> exp;
284
285 temp1 = *pData1;
286 temp2 = *pData2;
287 temp1 >>= shift;
288 temp2 >>= shift;
289
290 s1 = temp1 + temp2;
291 s2 = temp1 - temp2;
292
293 temp3 = *pData3;
294 temp4 = *pData4;
295 temp3 >>= shift;
296 temp4 >>= shift;
297
298 t1 = temp3 + temp4;
299 r1 = temp3 - temp4;
300
301 *pData1 = (s1 + t1) >> exp;
302 *pData2 = (s1 - t1) >> exp;
303
304 *pData4-- = (s2 + t2) >> exp;
305 *pData4 = (r2 - r1) >> exp;
306
307 *pData3-- = (s2 - t2) >> exp;
308 *pData3 = (r2 + r1) >> exp;
309
310
311 } /* i */
312
313 for (j = 1; j < n2; j++)
314 {
315 exp_jw1 = *pw++;
316 exp_jw2 = *pw++;
317 exp_jw3 = *pw++;
318
319
320 for (i = j; i < FFT_RX4_SHORT; i += n1)
321 {
322 pData1 = &Data[ i<<1];
323 pData3 = pData1 + n3;
324 pData2 = pData1 + n1;
325 pData4 = pData3 + n1;
326
327 temp1 = *(pData1);
328 temp2 = *(pData2++);
329 temp1 >>= shift;
330 temp2 >>= shift;
331
332 r1 = temp1 + temp2;
333 r2 = temp1 - temp2;
334 temp3 = *(pData3++);
335 temp4 = *(pData4++);
336 temp3 >>= shift;
337 temp4 >>= shift;
338
339 t1 = temp3 + temp4;
340 t2 = temp3 - temp4;
341
342 *(pData1++) = (r1 + t1) >> exp;
343 r1 = (r1 - t1) >> exp;
344
345 temp1 = *pData1;
346 temp2 = *pData2;
347 temp1 >>= shift;
348 temp2 >>= shift;
349
350 s1 = temp1 + temp2;
351 s2 = temp1 - temp2;
352
353 s3 = (s2 + t2) >> exp;
354 s2 = (s2 - t2) >> exp;
355
356 temp3 = *pData3;
357 temp4 = *pData4 ;
358 temp3 >>= shift;
359 temp4 >>= shift;
360
361 t1 = temp3 + temp4;
362 t2 = temp3 - temp4;
363
364 *pData1 = (s1 + t1) >> exp;
365 s1 = (s1 - t1) >> exp;
366
367
368 *pData2-- = cmplx_mul32_by_16(s1, -r1, exp_jw2) << 1;
369 *pData2 = cmplx_mul32_by_16(r1, s1, exp_jw2) << 1;
370
371 r3 = ((r2 - t2) >> exp);
372 r2 = ((r2 + t2) >> exp);
373
374 *pData3-- = cmplx_mul32_by_16(s2, -r2, exp_jw1) << 1;
375 *pData3 = cmplx_mul32_by_16(r2, s2, exp_jw1) << 1;
376
377 *pData4-- = cmplx_mul32_by_16(s3, -r3, exp_jw3) << 1;
378 *pData4 = cmplx_mul32_by_16(r3, s3, exp_jw3) << 1;
379
380 } /* i */
381
382 } /* j */
383
384 /*
385 * this will reset exp and shift to zero for the second pass of the
386 * loop
387 */
388 exp = 2;
389 shift = 0;
390
391 } /* k */
392
393
394 max = 0;
395
396 pData1 = Data - 7;
397
398 for (i = ONE_FOURTH_FFT_RX4_SHORT; i != 0 ; i--)
399 {
400 pData1 += 7;
401
402 pData3 = pData1 + 2;
403 pData2 = pData1 + 4;
404 pData4 = pData1 + 6;
405
406 temp1 = *pData1;
407 temp2 = *pData2++;
408
409 r1 = temp1 + temp2;
410 r2 = temp1 - temp2;
411
412 temp1 = *pData3++;
413 temp2 = *pData4++;
414
415 t1 = temp1 + temp2;
416 t2 = temp1 - temp2;
417
418 temp1 = (r1 + t1);
419 r1 = (r1 - t1);
420 *(pData1++) = temp1;
421 max |= (temp1 >> 31) ^ temp1;
422
423
424
425 temp1 = *pData1;
426 temp2 = *pData2;
427
428 s1 = temp1 + temp2;
429 s2 = temp1 - temp2;
430
431 s3 = (s2 + t2);
432 s2 = (s2 - t2);
433
434 temp1 = *pData3;
435 temp2 = *pData4;
436
437 t1 = temp1 + temp2;
438 t2 = temp1 - temp2;
439
440 temp1 = (s1 + t1);
441 temp2 = (s1 - t1);
442 *pData1 = temp1;
443 *pData2-- = temp2;
444 max |= (temp1 >> 31) ^ temp1;
445 max |= (temp2 >> 31) ^ temp2;
446
447 *pData2 = r1;
448 *pData3-- = s2;
449 *pData4-- = s3;
450 max |= (r1 >> 31) ^ r1;
451 max |= (s2 >> 31) ^ s2;
452 max |= (s3 >> 31) ^ s3;
453
454 temp1 = (r2 - t2);
455 temp2 = (r2 + t2);
456 *pData4 = temp1;
457 *pData3 = temp2;
458 max |= (temp1 >> 31) ^ temp1;
459 max |= (temp2 >> 31) ^ temp2;
460
461 } /* i */
462
463 *peak_value = max;
464
465
466 return (exponent);
467
468 }
469