1 /* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <stdlib.h>
7 #include "eq2.h"
8
9 struct eq2 {
10 int n[2];
11 struct biquad biquad[MAX_BIQUADS_PER_EQ2][2];
12 };
13
eq2_new()14 struct eq2 *eq2_new()
15 {
16 struct eq2 *eq2 = (struct eq2 *)calloc(1, sizeof(*eq2));
17 int i, j;
18
19 /* Initialize all biquads to identity filter, so if two channels have
20 * different numbers of biquads, it still works. */
21 for (i = 0; i < MAX_BIQUADS_PER_EQ2; i++)
22 for (j = 0; j < 2; j++)
23 biquad_set(&eq2->biquad[i][j], BQ_NONE, 0, 0, 0);
24
25 return eq2;
26 }
27
eq2_free(struct eq2 * eq2)28 void eq2_free(struct eq2 *eq2)
29 {
30 free(eq2);
31 }
32
eq2_append_biquad(struct eq2 * eq2,int channel,enum biquad_type type,float freq,float Q,float gain)33 int eq2_append_biquad(struct eq2 *eq2, int channel, enum biquad_type type,
34 float freq, float Q, float gain)
35 {
36 if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2)
37 return -1;
38 biquad_set(&eq2->biquad[eq2->n[channel]++][channel], type, freq, Q,
39 gain);
40 return 0;
41 }
42
eq2_append_biquad_direct(struct eq2 * eq2,int channel,const struct biquad * biquad)43 int eq2_append_biquad_direct(struct eq2 *eq2, int channel,
44 const struct biquad *biquad)
45 {
46 if (eq2->n[channel] >= MAX_BIQUADS_PER_EQ2)
47 return -1;
48 eq2->biquad[eq2->n[channel]++][channel] = *biquad;
49 return 0;
50 }
51
eq2_process_one(struct biquad (* bq)[2],float * data0,float * data1,int count)52 static inline void eq2_process_one(struct biquad (*bq)[2], float *data0,
53 float *data1, int count)
54 {
55 struct biquad *qL = &bq[0][0];
56 struct biquad *qR = &bq[0][1];
57
58 float x1L = qL->x1;
59 float x2L = qL->x2;
60 float y1L = qL->y1;
61 float y2L = qL->y2;
62 float b0L = qL->b0;
63 float b1L = qL->b1;
64 float b2L = qL->b2;
65 float a1L = qL->a1;
66 float a2L = qL->a2;
67
68 float x1R = qR->x1;
69 float x2R = qR->x2;
70 float y1R = qR->y1;
71 float y2R = qR->y2;
72 float b0R = qR->b0;
73 float b1R = qR->b1;
74 float b2R = qR->b2;
75 float a1R = qR->a1;
76 float a2R = qR->a2;
77
78 int j;
79 for (j = 0; j < count; j++) {
80 float xL = data0[j];
81 float xR = data1[j];
82
83 float yL = b0L * xL + b1L * x1L + b2L * x2L - a1L * y1L -
84 a2L * y2L;
85 x2L = x1L;
86 x1L = xL;
87 y2L = y1L;
88 y1L = yL;
89
90 float yR = b0R * xR + b1R * x1R + b2R * x2R - a1R * y1R -
91 a2R * y2R;
92 x2R = x1R;
93 x1R = xR;
94 y2R = y1R;
95 y1R = yR;
96
97 data0[j] = yL;
98 data1[j] = yR;
99 }
100
101 qL->x1 = x1L;
102 qL->x2 = x2L;
103 qL->y1 = y1L;
104 qL->y2 = y2L;
105 qR->x1 = x1R;
106 qR->x2 = x2R;
107 qR->y1 = y1R;
108 qR->y2 = y2R;
109 }
110
111 #ifdef __ARM_NEON__
112 #include <arm_neon.h>
eq2_process_two_neon(struct biquad (* bq)[2],float * data0,float * data1,int count)113 static inline void eq2_process_two_neon(struct biquad (*bq)[2], float *data0,
114 float *data1, int count)
115 {
116 struct biquad *qL = &bq[0][0];
117 struct biquad *rL = &bq[1][0];
118 struct biquad *qR = &bq[0][1];
119 struct biquad *rR = &bq[1][1];
120
121 float32x2_t x1 = { qL->x1, qR->x1 };
122 float32x2_t x2 = { qL->x2, qR->x2 };
123 float32x2_t y1 = { qL->y1, qR->y1 };
124 float32x2_t y2 = { qL->y2, qR->y2 };
125 float32x2_t qb0 = { qL->b0, qR->b0 };
126 float32x2_t qb1 = { qL->b1, qR->b1 };
127 float32x2_t qb2 = { qL->b2, qR->b2 };
128 float32x2_t qa1 = { qL->a1, qR->a1 };
129 float32x2_t qa2 = { qL->a2, qR->a2 };
130
131 float32x2_t z1 = { rL->y1, rR->y1 };
132 float32x2_t z2 = { rL->y2, rR->y2 };
133 float32x2_t rb0 = { rL->b0, rR->b0 };
134 float32x2_t rb1 = { rL->b1, rR->b1 };
135 float32x2_t rb2 = { rL->b2, rR->b2 };
136 float32x2_t ra1 = { rL->a1, rR->a1 };
137 float32x2_t ra2 = { rL->a2, rR->a2 };
138
139 // clang-format off
140 __asm__ __volatile__(
141 /* d0 = x, d1 = y, d2 = z */
142 "1: \n"
143 "vmul.f32 d1, %P[qb1], %P[x1] \n"
144 "vld1.32 d0[0], [%[data0]] \n"
145 "vld1.32 d0[1], [%[data1]] \n"
146 "subs %[count], #1 \n"
147 "vmul.f32 d2, %P[rb1], %P[y1] \n"
148 "vmla.f32 d1, %P[qb0], d0 \n"
149 "vmla.f32 d1, %P[qb2], %P[x2] \n"
150 "vmov.f32 %P[x2], %P[x1] \n"
151 "vmov.f32 %P[x1], d0 \n"
152 "vmls.f32 d1, %P[qa1], %P[y1] \n"
153 "vmls.f32 d1, %P[qa2], %P[y2] \n"
154 "vmla.f32 d2, %P[rb0], d1 \n"
155 "vmla.f32 d2, %P[rb2], %P[y2] \n"
156 "vmov.f32 %P[y2], %P[y1] \n"
157 "vmov.f32 %P[y1], d1 \n"
158 "vmls.f32 d2, %P[ra1], %P[z1] \n"
159 "vmls.f32 d2, %P[ra2], %P[z2] \n"
160 "vmov.f32 %P[z2], %P[z1] \n"
161 "vmov.f32 %P[z1], d2 \n"
162 "vst1.f32 d2[0], [%[data0]]! \n"
163 "vst1.f32 d2[1], [%[data1]]! \n"
164 "bne 1b \n"
165 : /* output */
166 [data0]"+r"(data0),
167 [data1]"+r"(data1),
168 [count]"+r"(count),
169 [x1]"+w"(x1),
170 [x2]"+w"(x2),
171 [y1]"+w"(y1),
172 [y2]"+w"(y2),
173 [z1]"+w"(z1),
174 [z2]"+w"(z2)
175 : /* input */
176 [qb0]"w"(qb0),
177 [qb1]"w"(qb1),
178 [qb2]"w"(qb2),
179 [qa1]"w"(qa1),
180 [qa2]"w"(qa2),
181 [rb0]"w"(rb0),
182 [rb1]"w"(rb1),
183 [rb2]"w"(rb2),
184 [ra1]"w"(ra1),
185 [ra2]"w"(ra2)
186 : /* clobber */
187 "d0", "d1", "d2", "memory", "cc");
188 // clang-format on
189
190 qL->x1 = x1[0];
191 qL->x2 = x2[0];
192 qL->y1 = y1[0];
193 qL->y2 = y2[0];
194 rL->y1 = z1[0];
195 rL->y2 = z2[0];
196 qR->x1 = x1[1];
197 qR->x2 = x2[1];
198 qR->y1 = y1[1];
199 qR->y2 = y2[1];
200 rR->y1 = z1[1];
201 rR->y2 = z2[1];
202 }
203 #endif
204
205 #if defined(__SSE3__) && defined(__x86_64__)
206 #include <emmintrin.h>
eq2_process_two_sse3(struct biquad (* bq)[2],float * data0,float * data1,int count)207 static inline void eq2_process_two_sse3(struct biquad (*bq)[2], float *data0,
208 float *data1, int count)
209 {
210 struct biquad *qL = &bq[0][0];
211 struct biquad *rL = &bq[1][0];
212 struct biquad *qR = &bq[0][1];
213 struct biquad *rR = &bq[1][1];
214
215 __m128 x1 = { qL->x1, qR->x1 };
216 __m128 x2 = { qL->x2, qR->x2 };
217 __m128 y1 = { qL->y1, qR->y1 };
218 __m128 y2 = { qL->y2, qR->y2 };
219 __m128 qb0 = { qL->b0, qR->b0 };
220 __m128 qb1 = { qL->b1, qR->b1 };
221 __m128 qb2 = { qL->b2, qR->b2 };
222 __m128 qa1 = { qL->a1, qR->a1 };
223 __m128 qa2 = { qL->a2, qR->a2 };
224
225 __m128 z1 = { rL->y1, rR->y1 };
226 __m128 z2 = { rL->y2, rR->y2 };
227 __m128 rb0 = { rL->b0, rR->b0 };
228 __m128 rb1 = { rL->b1, rR->b1 };
229 __m128 rb2 = { rL->b2, rR->b2 };
230 __m128 ra1 = { rL->a1, rR->a1 };
231 __m128 ra2 = { rL->a2, rR->a2 };
232
233 // clang-format off
234 __asm__ __volatile__(
235 "1: \n"
236 "movss (%[data0]), %%xmm2 \n"
237 "movss (%[data1]), %%xmm1 \n"
238 "unpcklps %%xmm1, %%xmm2 \n"
239 "mulps %[qb2],%[x2] \n"
240 "lddqu %[qb0],%%xmm0 \n"
241 "mulps %[ra2],%[z2] \n"
242 "lddqu %[qb1],%%xmm1 \n"
243 "mulps %%xmm2,%%xmm0 \n"
244 "mulps %[x1],%%xmm1 \n"
245 "addps %%xmm1,%%xmm0 \n"
246 "movaps %[qa1],%%xmm1 \n"
247 "mulps %[y1],%%xmm1 \n"
248 "addps %[x2],%%xmm0 \n"
249 "movaps %[rb1],%[x2] \n"
250 "mulps %[y1],%[x2] \n"
251 "subps %%xmm1,%%xmm0 \n"
252 "movaps %[qa2],%%xmm1 \n"
253 "mulps %[y2],%%xmm1 \n"
254 "mulps %[rb2],%[y2] \n"
255 "subps %%xmm1,%%xmm0 \n"
256 "movaps %[rb0],%%xmm1 \n"
257 "mulps %%xmm0,%%xmm1 \n"
258 "addps %[x2],%%xmm1 \n"
259 "movaps %[x1],%[x2] \n"
260 "movaps %%xmm2,%[x1] \n"
261 "addps %[y2],%%xmm1 \n"
262 "movaps %[ra1],%[y2] \n"
263 "mulps %[z1],%[y2] \n"
264 "subps %[y2],%%xmm1 \n"
265 "movaps %[y1],%[y2] \n"
266 "movaps %%xmm0,%[y1] \n"
267 "subps %[z2],%%xmm1 \n"
268 "movaps %[z1],%[z2] \n"
269 "movaps %%xmm1,%[z1] \n"
270 "movss %%xmm1, (%[data0]) \n"
271 "shufps $1, %%xmm1, %%xmm1 \n"
272 "movss %%xmm1, (%[data1]) \n"
273 "add $4, %[data0] \n"
274 "add $4, %[data1] \n"
275 "sub $1, %[count] \n"
276 "jnz 1b \n"
277 : /* output */
278 [data0]"+r"(data0),
279 [data1]"+r"(data1),
280 [count]"+r"(count),
281 [x1]"+x"(x1),
282 [x2]"+x"(x2),
283 [y1]"+x"(y1),
284 [y2]"+x"(y2),
285 [z1]"+x"(z1),
286 [z2]"+x"(z2)
287 : /* input */
288 [qb0]"m"(qb0),
289 [qb1]"m"(qb1),
290 [qb2]"m"(qb2),
291 [qa1]"x"(qa1),
292 [qa2]"x"(qa2),
293 [rb0]"x"(rb0),
294 [rb1]"x"(rb1),
295 [rb2]"x"(rb2),
296 [ra1]"x"(ra1),
297 [ra2]"x"(ra2)
298 : /* clobber */
299 "xmm0", "xmm1", "xmm2", "memory", "cc");
300 // clang-format on
301
302 qL->x1 = x1[0];
303 qL->x2 = x2[0];
304 qL->y1 = y1[0];
305 qL->y2 = y2[0];
306 rL->y1 = z1[0];
307 rL->y2 = z2[0];
308 qR->x1 = x1[1];
309 qR->x2 = x2[1];
310 qR->y1 = y1[1];
311 qR->y2 = y2[1];
312 rR->y1 = z1[1];
313 rR->y2 = z2[1];
314 }
315 #endif
316
eq2_process(struct eq2 * eq2,float * data0,float * data1,int count)317 void eq2_process(struct eq2 *eq2, float *data0, float *data1, int count)
318 {
319 int i;
320 int n;
321 if (!count)
322 return;
323 n = eq2->n[0];
324 if (eq2->n[1] > n)
325 n = eq2->n[1];
326 for (i = 0; i < n; i += 2) {
327 if (i + 1 == n) {
328 eq2_process_one(&eq2->biquad[i], data0, data1, count);
329 } else {
330 #if defined(__ARM_NEON__)
331 eq2_process_two_neon(&eq2->biquad[i], data0, data1,
332 count);
333 #elif defined(__SSE3__) && defined(__x86_64__)
334 eq2_process_two_sse3(&eq2->biquad[i], data0, data1,
335 count);
336 #else
337 eq2_process_one(&eq2->biquad[i], data0, data1, count);
338 eq2_process_one(&eq2->biquad[i + 1], data0, data1,
339 count);
340 #endif
341 }
342 }
343 }
344