• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /*
19 
20  Pathname: ./src/fft_rx4_short.c
21  Funtions: fft_rx4_short
22 
23 ------------------------------------------------------------------------------
24  REVISION HISTORY
25 
26  Description:
27             (1) Eliminated search for max in the main loop.
28             (2) Simplified the function by eliminating different conditions
29                 for exp.
30             (3) Reduced precision on w_64rx4 from Q15 to Q12, so now the
31                 input can be as high as 1.0 and saturation will not occurre
32                 because the accumulation times the new Q12 format will never
33                 exceed 31 bits.
34 
35  Description:
36             (1) Added comment to explain max search elimination and
37                 Q format during multiplications
38             (2) Increased down shift from 1 to 2, to ensure that 32-bit
39                 numbers will not overflow when 2 consecutive adds are done
40                 This was found during code review.
41 
42  Who:                                   Date:
43  Description:
44 
45 ------------------------------------------------------------------------------
46  INPUT AND OUTPUT DEFINITIONS
47 
48  Inputs:
49     Data       =  Input complex vector, arranged in the following order:
50                   real, imag, real, imag...
51                   This is a complex vector whose elements (real and Imag) are
52                   Int32.
53                   type Int32 *
54 
55     peak_value =  Input,  peak value of the input vector
56                   Output,  peak value of the resulting vector
57                   type Int32 *
58 
59  Local Stores/Buffers/Pointers Needed:
60     None
61 
62  Global Stores/Buffers/Pointers Needed:
63     None
64 
65  Outputs:
66     exponent returns a shift to compensate the scaling introduced by
67     overflow protection
68 
69  Pointers and Buffers Modified:
70     calculation are done in-place and returned in Data
71 
72  Local Stores Modified:
73     None
74 
75  Global Stores Modified:
76     None
77 
78 ------------------------------------------------------------------------------
79  FUNCTION DESCRIPTION
80 
81     Fast Fourier Transform, radix 4 with Decimation in Frequency and block
82     floating point arithmetic.
83     The radix-4 FFT  simply divides the FFT into four smaller FFTs. Each of
84     the smaller FFTs is then further divided into smaller ones and so on.
85     It consists of log 4 N stages and each stage consists of N/4 dragonflies.
86 
87     An FFT is nothing but a bundle of multiplications and summations which
88     may overflow during calculations.
89 
90 
91     This routine uses a scheme to test and scale the result output from
92     each FFT stage in order to fix the accumulation overflow.
93 
94     The Input Data should be in Q13 format to get the highest precision.
95     At the end of each dragonfly calculation, a test for possible bit growth
96     is made, if bit growth is possible the Data is scale down back to Q13.
97 
98 
99 ------------------------------------------------------------------------------
100  REQUIREMENTS
101 
102     This function should provide a fixed point FFT for an input array
103     of size 64.
104 
105 ------------------------------------------------------------------------------
106  REFERENCES
107 
108     [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling,
109         C. Nikias, Macmillan Pub. Co.
110 
111 ------------------------------------------------------------------------------
112  PSEUDO-CODE
113 
114 
115    MODIFY( x[] )
116    RETURN( exponent )
117 
118 ------------------------------------------------------------------------------
119  RESOURCES USED
120    When the code is written for a specific target processor the
121      the resources used should be documented below.
122 
123  STACK USAGE: [stack count for this module] + [variable to represent
124           stack usage for each subroutine called]
125 
126      where: [stack usage variable] = stack usage for [subroutine
127          name] (see [filename].ext)
128 
129  DATA MEMORY USED: x words
130 
131  PROGRAM MEMORY USED: x words
132 
133  CLOCK CYCLES: [cycle count equation for this module] + [variable
134            used to represent cycle count for each subroutine
135            called]
136 
137      where: [cycle count variable] = cycle count for [subroutine
138         name] (see [filename].ext)
139 
140 ------------------------------------------------------------------------------
141 */
142 /*----------------------------------------------------------------------------
143 ; INCLUDES
144 ----------------------------------------------------------------------------*/
145 
146 #include "pv_audio_type_defs.h"
147 #include "fft_rx4.h"
148 #include "pv_normalize.h"
149 #include "fxp_mul32.h"
150 
151 /*----------------------------------------------------------------------------
152 ; MACROS
153 ; Define module specific macros here
154 ----------------------------------------------------------------------------*/
155 
156 /*----------------------------------------------------------------------------
157 ; DEFINES
158 ; Include all pre-processor statements here. Include conditional
159 ; compile variables also.
160 ----------------------------------------------------------------------------*/
161 
162 /*----------------------------------------------------------------------------
163 ; LOCAL FUNCTION DEFINITIONS
164 ; Function Prototype declaration
165 ----------------------------------------------------------------------------*/
166 
167 /*----------------------------------------------------------------------------
168 ; LOCAL VARIABLE DEFINITIONS
169 ; Variable declaration - defined here and used outside this module
170 ----------------------------------------------------------------------------*/
171 
172 /*----------------------------------------------------------------------------
173 ; EXTERNAL FUNCTION REFERENCES
174 ; Declare functions defined elsewhere and referenced in this module
175 ----------------------------------------------------------------------------*/
176 
177 /*----------------------------------------------------------------------------
178 ; EXTERNAL VARIABLES REFERENCES
179 ; Declare variables used in this module but defined elsewhere
180 ----------------------------------------------------------------------------*/
181 
182 /*----------------------------------------------------------------------------
183 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
184 ; Declare variables used in this module but defined elsewhere
185 ----------------------------------------------------------------------------*/
186 
187 /*----------------------------------------------------------------------------
188 ; FUNCTION CODE
189 ----------------------------------------------------------------------------*/
190 
fft_rx4_short(Int32 Data[],Int32 * peak_value)191 Int fft_rx4_short(
192     Int32      Data[],
193     Int32      *peak_value)
194 
195 {
196     Int     n1;
197     Int     n2;
198     Int     n3;
199     Int     j;
200     Int     k;
201     Int     i;
202     Int32   exp_jw1;
203     Int32   exp_jw2;
204     Int32   exp_jw3;
205 
206 
207     Int32   t1;
208     Int32   t2;
209     Int32   r1;
210     Int32   r2;
211     Int32   r3;
212     Int32   s1;
213     Int32   s2;
214     Int32   s3;
215 
216     Int32   *pData1;
217     Int32   *pData2;
218     Int32   *pData3;
219     Int32   *pData4;
220     const Int32  *pw;
221     Int32   temp1;
222     Int32   temp2;
223     Int32   temp3;
224     Int32   temp4;
225     Int32   max;
226     Int     exp;
227     Int     exponent = 0;
228     Int     shift;
229 
230 
231     max = *peak_value;
232     exp = 0;
233 
234     if (max > 0x008000)
235     {
236         exp = 8 - pv_normalize(max);   /* use 24 bits  */
237 
238         exponent = exp;        /* keeps track of # of shifts */
239 
240     }
241 
242     n2 = FFT_RX4_SHORT;
243 
244     pw = W_64rx4;
245 
246 
247     /* shift down to avoid possible overflow in first pass of the loop */
248     shift = 2;
249 
250     for (k = FFT_RX4_SHORT; k > 4; k >>= 2)
251     {
252 
253         n1 = n2;
254         n2 >>= 2;
255         n3 = n1 >> 1;
256 
257         exp -= 2;
258 
259         for (i = 0; i < FFT_RX4_SHORT; i += n1)
260         {
261             pData1 = &Data[ i<<1];
262             pData3 = pData1 + n3;
263             pData2 = pData1 + n1;
264             pData4 = pData3 + n1;
265 
266             temp1   = *(pData1);
267             temp2   = *(pData2);
268             temp1   >>= shift;
269             temp2   >>= shift;
270 
271             r1      = temp1 + temp2;
272             r2      = temp1 - temp2;
273 
274             temp3   = *(pData3++);
275             temp4   = *(pData4++);
276             temp3   >>= shift;
277             temp4   >>= shift;
278 
279             t1      = temp3 + temp4;
280             t2      = temp3 - temp4;
281 
282             *(pData1++) = (r1 + t1) >> exp;
283             *(pData2++) = (r1 - t1) >> exp;
284 
285             temp1   = *pData1;
286             temp2   = *pData2;
287             temp1   >>= shift;
288             temp2   >>= shift;
289 
290             s1      = temp1 + temp2;
291             s2      = temp1 - temp2;
292 
293             temp3   = *pData3;
294             temp4   = *pData4;
295             temp3   >>= shift;
296             temp4   >>= shift;
297 
298             t1      = temp3 + temp4;
299             r1      = temp3 - temp4;
300 
301             *pData1   = (s1 + t1) >> exp;
302             *pData2   = (s1 - t1) >> exp;
303 
304             *pData4--    = (s2 + t2) >> exp;
305             *pData4      = (r2 - r1) >> exp;
306 
307             *pData3--    = (s2 - t2) >> exp;
308             *pData3      = (r2 + r1) >> exp;
309 
310 
311         }  /* i */
312 
313         for (j = 1; j < n2; j++)
314         {
315             exp_jw1 = *pw++;
316             exp_jw2 = *pw++;
317             exp_jw3 = *pw++;
318 
319 
320             for (i = j; i < FFT_RX4_SHORT; i += n1)
321             {
322                 pData1 = &Data[ i<<1];
323                 pData3 = pData1 + n3;
324                 pData2 = pData1 + n1;
325                 pData4 = pData3 + n1;
326 
327                 temp1   = *(pData1);
328                 temp2   = *(pData2++);
329                 temp1   >>= shift;
330                 temp2   >>= shift;
331 
332                 r1      = temp1 + temp2;
333                 r2      = temp1 - temp2;
334                 temp3   = *(pData3++);
335                 temp4   = *(pData4++);
336                 temp3   >>= shift;
337                 temp4   >>= shift;
338 
339                 t1      = temp3 + temp4;
340                 t2      = temp3 - temp4;
341 
342                 *(pData1++) = (r1 + t1) >> exp;
343                 r1          = (r1 - t1) >> exp;
344 
345                 temp1   = *pData1;
346                 temp2   = *pData2;
347                 temp1   >>= shift;
348                 temp2   >>= shift;
349 
350                 s1      = temp1 + temp2;
351                 s2      = temp1 - temp2;
352 
353                 s3      = (s2 + t2) >> exp;
354                 s2      = (s2 - t2) >> exp;
355 
356                 temp3   = *pData3;
357                 temp4   = *pData4 ;
358                 temp3   >>= shift;
359                 temp4   >>= shift;
360 
361                 t1      = temp3 + temp4;
362                 t2      = temp3 - temp4;
363 
364                 *pData1  = (s1 + t1) >> exp;
365                 s1       = (s1 - t1) >> exp;
366 
367 
368                 *pData2--  = cmplx_mul32_by_16(s1, -r1, exp_jw2) << 1;
369                 *pData2    = cmplx_mul32_by_16(r1,  s1, exp_jw2) << 1;
370 
371                 r3       = ((r2 - t2) >> exp);
372                 r2       = ((r2 + t2) >> exp);
373 
374                 *pData3--  = cmplx_mul32_by_16(s2, -r2, exp_jw1) << 1;
375                 *pData3    = cmplx_mul32_by_16(r2,  s2, exp_jw1) << 1;
376 
377                 *pData4--  = cmplx_mul32_by_16(s3, -r3, exp_jw3) << 1;
378                 *pData4    = cmplx_mul32_by_16(r3,  s3, exp_jw3) << 1;
379 
380             }  /* i */
381 
382         }  /*  j */
383 
384         /*
385          *  this will reset exp and shift to zero for the second pass of the
386          *  loop
387          */
388         exp   = 2;
389         shift = 0;
390 
391     } /* k */
392 
393 
394     max = 0;
395 
396     pData1 = Data - 7;
397 
398     for (i = ONE_FOURTH_FFT_RX4_SHORT; i != 0 ; i--)
399     {
400         pData1 += 7;
401 
402         pData3 = pData1 + 2;
403         pData2 = pData1 + 4;
404         pData4 = pData1 + 6;
405 
406         temp1   = *pData1;
407         temp2   = *pData2++;
408 
409         r1      = temp1 + temp2;
410         r2      = temp1 - temp2;
411 
412         temp1   = *pData3++;
413         temp2   = *pData4++;
414 
415         t1      = temp1 + temp2;
416         t2      = temp1 - temp2;
417 
418         temp1       = (r1 + t1);
419         r1          = (r1 - t1);
420         *(pData1++) = temp1;
421         max        |= (temp1 >> 31) ^ temp1;
422 
423 
424 
425         temp1   = *pData1;
426         temp2   = *pData2;
427 
428         s1      = temp1 + temp2;
429         s2      = temp1 - temp2;
430 
431         s3      = (s2 + t2);
432         s2      = (s2 - t2);
433 
434         temp1   = *pData3;
435         temp2   = *pData4;
436 
437         t1      = temp1 + temp2;
438         t2      = temp1 - temp2;
439 
440         temp1      = (s1 + t1);
441         temp2      = (s1 - t1);
442         *pData1    = temp1;
443         *pData2--  = temp2;
444         max       |= (temp1 >> 31) ^ temp1;
445         max       |= (temp2 >> 31) ^ temp2;
446 
447         *pData2    = r1;
448         *pData3--  = s2;
449         *pData4--  = s3;
450         max       |= (r1 >> 31) ^ r1;
451         max       |= (s2 >> 31) ^ s2;
452         max       |= (s3 >> 31) ^ s3;
453 
454         temp1      = (r2 - t2);
455         temp2      = (r2 + t2);
456         *pData4    = temp1;
457         *pData3    = temp2;
458         max       |= (temp1 >> 31) ^ temp1;
459         max       |= (temp2 >> 31) ^ temp2;
460 
461     }  /* i */
462 
463     *peak_value = max;
464 
465 
466     return (exponent);
467 
468 }
469