1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 *
10 */
11
12 #include "dl/api/omxtypes.h"
13
x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(const OMX_F32 * in,OMX_F32 * out,const OMX_F32 * twiddle,OMX_INT n,OMX_INT sub_size,OMX_INT sub_num)14 void x86SP_FFT_CToC_FC32_Fwd_Radix4_ms(
15 const OMX_F32 *in,
16 OMX_F32 *out,
17 const OMX_F32 *twiddle,
18 OMX_INT n,
19 OMX_INT sub_size,
20 OMX_INT sub_num) {
21 OMX_INT set;
22 OMX_INT grp;
23 OMX_INT step = sub_num >> 1;
24 OMX_INT set_count = sub_num >> 2;
25 OMX_INT n_by_4 = n >> 2;
26 OMX_INT n_mul_2 = n << 1;
27 OMX_F32 *out0 = out;
28
29 // grp == 0
30 for (set = 0; set < set_count; ++set) {
31 OMX_FC32 t0;
32 OMX_FC32 t1;
33 OMX_FC32 t2;
34 OMX_FC32 t3;
35
36 const OMX_F32 *in0 = in + set;
37 const OMX_F32 *in1 = in0 + set_count;
38 const OMX_F32 *in2 = in1 + set_count;
39 const OMX_F32 *in3 = in2 + set_count;
40 OMX_F32 *out1 = out0 + n_by_4;
41 OMX_F32 *out2 = out1 + n_by_4;
42 OMX_F32 *out3 = out2 + n_by_4;
43
44 // CADD t0, in0, in2
45 t0.Re = in0[0] + in2[0];
46 t0.Im = in0[n] + in2[n];
47
48 // CSUB t1, in0, in2
49 t1.Re = in0[0] - in2[0];
50 t1.Im = in0[n] - in2[n];
51
52 // CADD t2, in1, in3
53 t2.Re = in1[0] + in3[0];
54 t2.Im = in1[n] + in3[n];
55
56 // CSUB t3, in1, in3
57 t3.Re = in1[0] - in3[0];
58 t3.Im = in1[n] - in3[n];
59
60 // CADD out0, t0, t2
61 out0[0] = t0.Re + t2.Re;
62 out0[n] = t0.Im + t2.Im;
63
64 // CSUB out2, t0, t2
65 out2[0] = t0.Re - t2.Re;
66 out2[n] = t0.Im - t2.Im;
67
68 // CSUB_ADD_X out3, t1, t3
69 out3[0] = t1.Re - t3.Im;
70 out3[n] = t1.Im + t3.Re;
71
72 // CADD_SUB_X out1, t1, t3
73 out1[0] = t1.Re + t3.Im;
74 out1[n] = t1.Im - t3.Re;
75
76 out0 += 1;
77 }
78
79 // grp > 0
80 for (grp = 1; grp < sub_size; ++grp) {
81 const OMX_F32 *tw1 = twiddle + grp * step;
82 const OMX_F32 *tw2 = tw1 + grp * step;
83 const OMX_F32 *tw3 = tw2 + grp * step;
84
85 for (set = 0; set < set_count; ++set) {
86 OMX_FC32 t0;
87 OMX_FC32 t1;
88 OMX_FC32 t2;
89 OMX_FC32 t3;
90 OMX_FC32 tt1;
91 OMX_FC32 tt2;
92 OMX_FC32 tt3;
93
94 const OMX_F32 *in0 = in + set + grp * sub_num;
95 const OMX_F32 *in1 = in0 + set_count;
96 const OMX_F32 *in2 = in1 + set_count;
97 const OMX_F32 *in3 = in2 + set_count;
98 OMX_F32 *out1 = out0 + n_by_4;
99 OMX_F32 *out2 = out1 + n_by_4;
100 OMX_F32 *out3 = out2 + n_by_4;
101
102 // CMUL tt1, Tw1, in1
103 tt1.Re = tw1[0] * in1[0] - tw1[n_mul_2] * in1[n];
104 tt1.Im = tw1[0] * in1[n] + tw1[n_mul_2] * in1[0];
105
106 // CMUL tt2, Tw2, in2
107 tt2.Re = tw2[0] * in2[0] - tw2[n_mul_2] * in2[n];
108 tt2.Im = tw2[0] * in2[n] + tw2[n_mul_2] * in2[0];
109
110 // CMUL tt3, Tw3, in3
111 tt3.Re = tw3[0] * in3[0] - tw3[n_mul_2] * in3[n];
112 tt3.Im = tw3[0] * in3[n] + tw3[n_mul_2] * in3[0];
113
114 // CADD t0, in0, tt2
115 t0.Re = in0[0] + tt2.Re;
116 t0.Im = in0[n] + tt2.Im;
117
118 // CSUB t1, in0, tt2
119 t1.Re = in0[0] - tt2.Re;
120 t1.Im = in0[n] - tt2.Im;
121
122 // CADD t2, tt1, tt3
123 t2.Re = tt1.Re + tt3.Re;
124 t2.Im = tt1.Im + tt3.Im;
125
126 // CSUB t3, tt1, tt3
127 t3.Re = tt1.Re - tt3.Re;
128 t3.Im = tt1.Im - tt3.Im;
129
130 // CADD out0, t0, t2
131 out0[0] = t0.Re + t2.Re;
132 out0[n] = t0.Im + t2.Im;
133
134 // CSUB out2, t0, t2
135 out2[0] = t0.Re - t2.Re;
136 out2[n] = t0.Im - t2.Im;
137
138 // CADD_SUB_X out1, t1, t3
139 out1[0] = t1.Re + t3.Im;
140 out1[n] = t1.Im - t3.Re;
141
142 // CSUB_ADD_X out3, t1, t3
143 out3[0] = t1.Re - t3.Im;
144 out3[n] = t1.Im + t3.Re;
145
146 out0 += 1;
147 }
148 }
149 }
150