1 /* Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2 * Use of this source code is governed by a BSD-style license that can be
3 * found in the LICENSE file.
4 */
5
6 #include <string.h>
7 #include "crossover2.h"
8 #include "biquad.h"
9
lr42_set(struct lr42 * lr42,enum biquad_type type,float freq)10 static void lr42_set(struct lr42 *lr42, enum biquad_type type, float freq)
11 {
12 struct biquad q;
13 biquad_set(&q, type, freq, 0, 0);
14 memset(lr42, 0, sizeof(*lr42));
15 lr42->b0 = q.b0;
16 lr42->b1 = q.b1;
17 lr42->b2 = q.b2;
18 lr42->a1 = q.a1;
19 lr42->a2 = q.a2;
20 }
21
22 /* Split input data using two LR4 filters, put the result into the input array
23 * and another array.
24 *
25 * data0 --+-- lp --> data0
26 * |
27 * \-- hp --> data1
28 */
29 #if defined(__ARM_NEON__)
30 #include <arm_neon.h>
lr42_split(struct lr42 * lp,struct lr42 * hp,int count,float * data0L,float * data0R,float * data1L,float * data1R)31 static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
32 float *data0L, float *data0R, float *data1L,
33 float *data1R)
34 {
35 float32x4_t x1 = { lp->x1L, hp->x1L, lp->x1R, hp->x1R };
36 float32x4_t x2 = { lp->x2L, hp->x2L, lp->x2R, hp->x2R };
37 float32x4_t y1 = { lp->y1L, hp->y1L, lp->y1R, hp->y1R };
38 float32x4_t y2 = { lp->y2L, hp->y2L, lp->y2R, hp->y2R };
39 float32x4_t z1 = { lp->z1L, hp->z1L, lp->z1R, hp->z1R };
40 float32x4_t z2 = { lp->z2L, hp->z2L, lp->z2R, hp->z2R };
41 float32x4_t b0 = { lp->b0, hp->b0, lp->b0, hp->b0 };
42 float32x4_t b1 = { lp->b1, hp->b1, lp->b1, hp->b1 };
43 float32x4_t b2 = { lp->b2, hp->b2, lp->b2, hp->b2 };
44 float32x4_t a1 = { lp->a1, hp->a1, lp->a1, hp->a1 };
45 float32x4_t a2 = { lp->a2, hp->a2, lp->a2, hp->a2 };
46
47 // clang-format off
48 __asm__ __volatile__(
49 /* q0 = x, q1 = y, q2 = z */
50 "1: \n"
51 "vmul.f32 q1, %q[b1], %q[x1] \n"
52 "vld1.32 d0[], [%[data0L]] \n"
53 "vld1.32 d1[], [%[data0R]] \n"
54 "subs %[count], #1 \n"
55 "vmul.f32 q2, %q[b1], %q[y1] \n"
56 "vmla.f32 q1, %q[b0], q0 \n"
57 "vmla.f32 q1, %q[b2], %q[x2] \n"
58 "vmov.f32 %q[x2], %q[x1] \n"
59 "vmov.f32 %q[x1], q0 \n"
60 "vmls.f32 q1, %q[a1], %q[y1] \n"
61 "vmls.f32 q1, %q[a2], %q[y2] \n"
62 "vmla.f32 q2, %q[b0], q1 \n"
63 "vmla.f32 q2, %q[b2], %q[y2] \n"
64 "vmov.f32 %q[y2], %q[y1] \n"
65 "vmov.f32 %q[y1], q1 \n"
66 "vmls.f32 q2, %q[a1], %q[z1] \n"
67 "vmls.f32 q2, %q[a2], %q[z2] \n"
68 "vmov.f32 %q[z2], %q[z1] \n"
69 "vmov.f32 %q[z1], q2 \n"
70 "vst1.f32 d4[0], [%[data0L]]! \n"
71 "vst1.f32 d4[1], [%[data1L]]! \n"
72 "vst1.f32 d5[0], [%[data0R]]! \n"
73 "vst1.f32 d5[1], [%[data1R]]! \n"
74 "bne 1b \n"
75 : /* output */
76 "=r"(data0L),
77 "=r"(data0R),
78 "=r"(data1L),
79 "=r"(data1R),
80 "=r"(count),
81 [x1]"+w"(x1),
82 [x2]"+w"(x2),
83 [y1]"+w"(y1),
84 [y2]"+w"(y2),
85 [z1]"+w"(z1),
86 [z2]"+w"(z2)
87 : /* input */
88 [data0L]"0"(data0L),
89 [data0R]"1"(data0R),
90 [data1L]"2"(data1L),
91 [data1R]"3"(data1R),
92 [count]"4"(count),
93 [b0]"w"(b0),
94 [b1]"w"(b1),
95 [b2]"w"(b2),
96 [a1]"w"(a1),
97 [a2]"w"(a2)
98 : /* clobber */
99 "q0", "q1", "q2", "memory", "cc");
100 // clang-format on
101
102 lp->x1L = x1[0];
103 lp->x1R = x1[2];
104 lp->x2L = x2[0];
105 lp->x2R = x2[2];
106 lp->y1L = y1[0];
107 lp->y1R = y1[2];
108 lp->y2L = y2[0];
109 lp->y2R = y2[2];
110 lp->z1L = z1[0];
111 lp->z1R = z1[2];
112 lp->z2L = z2[0];
113 lp->z2R = z2[2];
114
115 hp->x1L = x1[1];
116 hp->x1R = x1[3];
117 hp->x2L = x2[1];
118 hp->x2R = x2[3];
119 hp->y1L = y1[1];
120 hp->y1R = y1[3];
121 hp->y2L = y2[1];
122 hp->y2R = y2[3];
123 hp->z1L = z1[1];
124 hp->z1R = z1[3];
125 hp->z2L = z2[1];
126 hp->z2R = z2[3];
127 }
128 #elif defined(__SSE3__) && defined(__x86_64__)
129 #include <emmintrin.h>
lr42_split(struct lr42 * lp,struct lr42 * hp,int count,float * data0L,float * data0R,float * data1L,float * data1R)130 static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
131 float *data0L, float *data0R, float *data1L,
132 float *data1R)
133 {
134 __m128 x1 = { lp->x1L, hp->x1L, lp->x1R, hp->x1R };
135 __m128 x2 = { lp->x2L, hp->x2L, lp->x2R, hp->x2R };
136 __m128 y1 = { lp->y1L, hp->y1L, lp->y1R, hp->y1R };
137 __m128 y2 = { lp->y2L, hp->y2L, lp->y2R, hp->y2R };
138 __m128 z1 = { lp->z1L, hp->z1L, lp->z1R, hp->z1R };
139 __m128 z2 = { lp->z2L, hp->z2L, lp->z2R, hp->z2R };
140 __m128 b0 = { lp->b0, hp->b0, lp->b0, hp->b0 };
141 __m128 b1 = { lp->b1, hp->b1, lp->b1, hp->b1 };
142 __m128 b2 = { lp->b2, hp->b2, lp->b2, hp->b2 };
143 __m128 a1 = { lp->a1, hp->a1, lp->a1, hp->a1 };
144 __m128 a2 = { lp->a2, hp->a2, lp->a2, hp->a2 };
145
146 // clang-format off
147 __asm__ __volatile__(
148 "1: \n"
149 "movss (%[data0L]), %%xmm2 \n"
150 "movss (%[data0R]), %%xmm1 \n"
151 "shufps $0, %%xmm1, %%xmm2 \n"
152 "mulps %[b2],%[x2] \n"
153 "movaps %[b0], %%xmm0 \n"
154 "mulps %[a2],%[z2] \n"
155 "movaps %[b1], %%xmm1 \n"
156 "mulps %%xmm2,%%xmm0 \n"
157 "mulps %[x1],%%xmm1 \n"
158 "addps %%xmm1,%%xmm0 \n"
159 "movaps %[a1],%%xmm1 \n"
160 "mulps %[y1],%%xmm1 \n"
161 "addps %[x2],%%xmm0 \n"
162 "movaps %[b1],%[x2] \n"
163 "mulps %[y1],%[x2] \n"
164 "subps %%xmm1,%%xmm0 \n"
165 "movaps %[a2],%%xmm1 \n"
166 "mulps %[y2],%%xmm1 \n"
167 "mulps %[b2],%[y2] \n"
168 "subps %%xmm1,%%xmm0 \n"
169 "movaps %[b0],%%xmm1 \n"
170 "mulps %%xmm0,%%xmm1 \n"
171 "addps %[x2],%%xmm1 \n"
172 "movaps %[x1],%[x2] \n"
173 "movaps %%xmm2,%[x1] \n"
174 "addps %[y2],%%xmm1 \n"
175 "movaps %[a1],%[y2] \n"
176 "mulps %[z1],%[y2] \n"
177 "subps %[y2],%%xmm1 \n"
178 "movaps %[y1],%[y2] \n"
179 "movaps %%xmm0,%[y1] \n"
180 "subps %[z2],%%xmm1 \n"
181 "movaps %[z1],%[z2] \n"
182 "movaps %%xmm1,%[z1] \n"
183 "movss %%xmm1, (%[data0L]) \n"
184 "shufps $0x39, %%xmm1, %%xmm1 \n"
185 "movss %%xmm1, (%[data1L]) \n"
186 "shufps $0x39, %%xmm1, %%xmm1 \n"
187 "movss %%xmm1, (%[data0R]) \n"
188 "shufps $0x39, %%xmm1, %%xmm1 \n"
189 "movss %%xmm1, (%[data1R]) \n"
190 "add $4, %[data0L] \n"
191 "add $4, %[data1L] \n"
192 "add $4, %[data0R] \n"
193 "add $4, %[data1R] \n"
194 "sub $1, %[count] \n"
195 "jnz 1b \n"
196 : /* output */
197 [data0L]"+r"(data0L),
198 [data0R]"+r"(data0R),
199 [data1L]"+r"(data1L),
200 [data1R]"+r"(data1R),
201 [count]"+r"(count),
202 [x1]"+x"(x1),
203 [x2]"+x"(x2),
204 [y1]"+x"(y1),
205 [y2]"+x"(y2),
206 [z1]"+x"(z1),
207 [z2]"+x"(z2)
208 : /* input */
209 [b0]"x"(b0),
210 [b1]"x"(b1),
211 [b2]"x"(b2),
212 [a1]"x"(a1),
213 [a2]"x"(a2)
214 : /* clobber */
215 "xmm0", "xmm1", "xmm2", "memory", "cc");
216 // clang-format on
217
218 lp->x1L = x1[0];
219 lp->x1R = x1[2];
220 lp->x2L = x2[0];
221 lp->x2R = x2[2];
222 lp->y1L = y1[0];
223 lp->y1R = y1[2];
224 lp->y2L = y2[0];
225 lp->y2R = y2[2];
226 lp->z1L = z1[0];
227 lp->z1R = z1[2];
228 lp->z2L = z2[0];
229 lp->z2R = z2[2];
230
231 hp->x1L = x1[1];
232 hp->x1R = x1[3];
233 hp->x2L = x2[1];
234 hp->x2R = x2[3];
235 hp->y1L = y1[1];
236 hp->y1R = y1[3];
237 hp->y2L = y2[1];
238 hp->y2R = y2[3];
239 hp->z1L = z1[1];
240 hp->z1R = z1[3];
241 hp->z2L = z2[1];
242 hp->z2R = z2[3];
243 }
244 #else
lr42_split(struct lr42 * lp,struct lr42 * hp,int count,float * data0L,float * data0R,float * data1L,float * data1R)245 static void lr42_split(struct lr42 *lp, struct lr42 *hp, int count,
246 float *data0L, float *data0R, float *data1L,
247 float *data1R)
248 {
249 float lx1L = lp->x1L, lx1R = lp->x1R;
250 float lx2L = lp->x2L, lx2R = lp->x2R;
251 float ly1L = lp->y1L, ly1R = lp->y1R;
252 float ly2L = lp->y2L, ly2R = lp->y2R;
253 float lz1L = lp->z1L, lz1R = lp->z1R;
254 float lz2L = lp->z2L, lz2R = lp->z2R;
255 float lb0 = lp->b0;
256 float lb1 = lp->b1;
257 float lb2 = lp->b2;
258 float la1 = lp->a1;
259 float la2 = lp->a2;
260
261 float hx1L = hp->x1L, hx1R = hp->x1R;
262 float hx2L = hp->x2L, hx2R = hp->x2R;
263 float hy1L = hp->y1L, hy1R = hp->y1R;
264 float hy2L = hp->y2L, hy2R = hp->y2R;
265 float hz1L = hp->z1L, hz1R = hp->z1R;
266 float hz2L = hp->z2L, hz2R = hp->z2R;
267 float hb0 = hp->b0;
268 float hb1 = hp->b1;
269 float hb2 = hp->b2;
270 float ha1 = hp->a1;
271 float ha2 = hp->a2;
272
273 int i;
274 for (i = 0; i < count; i++) {
275 float xL, yL, zL, xR, yR, zR;
276 xL = data0L[i];
277 xR = data0R[i];
278 yL = lb0 * xL + lb1 * lx1L + lb2 * lx2L - la1 * ly1L -
279 la2 * ly2L;
280 yR = lb0 * xR + lb1 * lx1R + lb2 * lx2R - la1 * ly1R -
281 la2 * ly2R;
282 zL = lb0 * yL + lb1 * ly1L + lb2 * ly2L - la1 * lz1L -
283 la2 * lz2L;
284 zR = lb0 * yR + lb1 * ly1R + lb2 * ly2R - la1 * lz1R -
285 la2 * lz2R;
286 lx2L = lx1L;
287 lx2R = lx1R;
288 lx1L = xL;
289 lx1R = xR;
290 ly2L = ly1L;
291 ly2R = ly1R;
292 ly1L = yL;
293 ly1R = yR;
294 lz2L = lz1L;
295 lz2R = lz1R;
296 lz1L = zL;
297 lz1R = zR;
298 data0L[i] = zL;
299 data0R[i] = zR;
300
301 yL = hb0 * xL + hb1 * hx1L + hb2 * hx2L - ha1 * hy1L -
302 ha2 * hy2L;
303 yR = hb0 * xR + hb1 * hx1R + hb2 * hx2R - ha1 * hy1R -
304 ha2 * hy2R;
305 zL = hb0 * yL + hb1 * hy1L + hb2 * hy2L - ha1 * hz1L -
306 ha2 * hz2L;
307 zR = hb0 * yR + hb1 * hy1R + hb2 * hy2R - ha1 * hz1R -
308 ha2 * hz2R;
309 hx2L = hx1L;
310 hx2R = hx1R;
311 hx1L = xL;
312 hx1R = xR;
313 hy2L = hy1L;
314 hy2R = hy1R;
315 hy1L = yL;
316 hy1R = yR;
317 hz2L = hz1L;
318 hz2R = hz1R;
319 hz1L = zL;
320 hz1R = zR;
321 data1L[i] = zL;
322 data1R[i] = zR;
323 }
324
325 lp->x1L = lx1L;
326 lp->x1R = lx1R;
327 lp->x2L = lx2L;
328 lp->x2R = lx2R;
329 lp->y1L = ly1L;
330 lp->y1R = ly1R;
331 lp->y2L = ly2L;
332 lp->y2R = ly2R;
333 lp->z1L = lz1L;
334 lp->z1R = lz1R;
335 lp->z2L = lz2L;
336 lp->z2R = lz2R;
337
338 hp->x1L = hx1L;
339 hp->x1R = hx1R;
340 hp->x2L = hx2L;
341 hp->x2R = hx2R;
342 hp->y1L = hy1L;
343 hp->y1R = hy1R;
344 hp->y2L = hy2L;
345 hp->y2R = hy2R;
346 hp->z1L = hz1L;
347 hp->z1R = hz1R;
348 hp->z2L = hz2L;
349 hp->z2R = hz2R;
350 }
351 #endif
352
353 /* Split input data using two LR4 filters and sum them back to the original
354 * data array.
355 *
356 * data --+-- lp --+--> data
357 * | |
358 * \-- hp --/
359 */
360 #if defined(__ARM_NEON__)
361 #include <arm_neon.h>
lr42_merge(struct lr42 * lp,struct lr42 * hp,int count,float * dataL,float * dataR)362 static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
363 float *dataL, float *dataR)
364 {
365 float32x4_t x1 = { lp->x1L, hp->x1L, lp->x1R, hp->x1R };
366 float32x4_t x2 = { lp->x2L, hp->x2L, lp->x2R, hp->x2R };
367 float32x4_t y1 = { lp->y1L, hp->y1L, lp->y1R, hp->y1R };
368 float32x4_t y2 = { lp->y2L, hp->y2L, lp->y2R, hp->y2R };
369 float32x4_t z1 = { lp->z1L, hp->z1L, lp->z1R, hp->z1R };
370 float32x4_t z2 = { lp->z2L, hp->z2L, lp->z2R, hp->z2R };
371 float32x4_t b0 = { lp->b0, hp->b0, lp->b0, hp->b0 };
372 float32x4_t b1 = { lp->b1, hp->b1, lp->b1, hp->b1 };
373 float32x4_t b2 = { lp->b2, hp->b2, lp->b2, hp->b2 };
374 float32x4_t a1 = { lp->a1, hp->a1, lp->a1, hp->a1 };
375 float32x4_t a2 = { lp->a2, hp->a2, lp->a2, hp->a2 };
376
377 // clang-format off
378 __asm__ __volatile__(
379 /* q0 = x, q1 = y, q2 = z */
380 "1: \n"
381 "vmul.f32 q1, %q[b1], %q[x1] \n"
382 "vld1.32 d0[], [%[dataL]] \n"
383 "vld1.32 d1[], [%[dataR]] \n"
384 "subs %[count], #1 \n"
385 "vmul.f32 q2, %q[b1], %q[y1] \n"
386 "vmla.f32 q1, %q[b0], q0 \n"
387 "vmla.f32 q1, %q[b2], %q[x2] \n"
388 "vmov.f32 %q[x2], %q[x1] \n"
389 "vmov.f32 %q[x1], q0 \n"
390 "vmls.f32 q1, %q[a1], %q[y1] \n"
391 "vmls.f32 q1, %q[a2], %q[y2] \n"
392 "vmla.f32 q2, %q[b0], q1 \n"
393 "vmla.f32 q2, %q[b2], %q[y2] \n"
394 "vmov.f32 %q[y2], %q[y1] \n"
395 "vmov.f32 %q[y1], q1 \n"
396 "vmls.f32 q2, %q[a1], %q[z1] \n"
397 "vmls.f32 q2, %q[a2], %q[z2] \n"
398 "vmov.f32 %q[z2], %q[z1] \n"
399 "vmov.f32 %q[z1], q2 \n"
400 "vpadd.f32 d4, d4, d5 \n"
401 "vst1.f32 d4[0], [%[dataL]]! \n"
402 "vst1.f32 d4[1], [%[dataR]]! \n"
403 "bne 1b \n"
404 : /* output */
405 "=r"(dataL),
406 "=r"(dataR),
407 "=r"(count),
408 [x1]"+w"(x1),
409 [x2]"+w"(x2),
410 [y1]"+w"(y1),
411 [y2]"+w"(y2),
412 [z1]"+w"(z1),
413 [z2]"+w"(z2)
414 : /* input */
415 [dataL]"0"(dataL),
416 [dataR]"1"(dataR),
417 [count]"2"(count),
418 [b0]"w"(b0),
419 [b1]"w"(b1),
420 [b2]"w"(b2),
421 [a1]"w"(a1),
422 [a2]"w"(a2)
423 : /* clobber */
424 "q0", "q1", "q2", "memory", "cc");
425 // clang-format on
426
427 lp->x1L = x1[0];
428 lp->x1R = x1[2];
429 lp->x2L = x2[0];
430 lp->x2R = x2[2];
431 lp->y1L = y1[0];
432 lp->y1R = y1[2];
433 lp->y2L = y2[0];
434 lp->y2R = y2[2];
435 lp->z1L = z1[0];
436 lp->z1R = z1[2];
437 lp->z2L = z2[0];
438 lp->z2R = z2[2];
439
440 hp->x1L = x1[1];
441 hp->x1R = x1[3];
442 hp->x2L = x2[1];
443 hp->x2R = x2[3];
444 hp->y1L = y1[1];
445 hp->y1R = y1[3];
446 hp->y2L = y2[1];
447 hp->y2R = y2[3];
448 hp->z1L = z1[1];
449 hp->z1R = z1[3];
450 hp->z2L = z2[1];
451 hp->z2R = z2[3];
452 }
453 #elif defined(__SSE3__) && defined(__x86_64__)
454 #include <emmintrin.h>
lr42_merge(struct lr42 * lp,struct lr42 * hp,int count,float * dataL,float * dataR)455 static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
456 float *dataL, float *dataR)
457 {
458 __m128 x1 = { lp->x1L, hp->x1L, lp->x1R, hp->x1R };
459 __m128 x2 = { lp->x2L, hp->x2L, lp->x2R, hp->x2R };
460 __m128 y1 = { lp->y1L, hp->y1L, lp->y1R, hp->y1R };
461 __m128 y2 = { lp->y2L, hp->y2L, lp->y2R, hp->y2R };
462 __m128 z1 = { lp->z1L, hp->z1L, lp->z1R, hp->z1R };
463 __m128 z2 = { lp->z2L, hp->z2L, lp->z2R, hp->z2R };
464 __m128 b0 = { lp->b0, hp->b0, lp->b0, hp->b0 };
465 __m128 b1 = { lp->b1, hp->b1, lp->b1, hp->b1 };
466 __m128 b2 = { lp->b2, hp->b2, lp->b2, hp->b2 };
467 __m128 a1 = { lp->a1, hp->a1, lp->a1, hp->a1 };
468 __m128 a2 = { lp->a2, hp->a2, lp->a2, hp->a2 };
469
470 // clang-format off
471 __asm__ __volatile__(
472 "1: \n"
473 "movss (%[dataL]), %%xmm2 \n"
474 "movss (%[dataR]), %%xmm1 \n"
475 "shufps $0, %%xmm1, %%xmm2 \n"
476 "mulps %[b2],%[x2] \n"
477 "movaps %[b0], %%xmm0 \n"
478 "mulps %[a2],%[z2] \n"
479 "movaps %[b1], %%xmm1 \n"
480 "mulps %%xmm2,%%xmm0 \n"
481 "mulps %[x1],%%xmm1 \n"
482 "addps %%xmm1,%%xmm0 \n"
483 "movaps %[a1],%%xmm1 \n"
484 "mulps %[y1],%%xmm1 \n"
485 "addps %[x2],%%xmm0 \n"
486 "movaps %[b1],%[x2] \n"
487 "mulps %[y1],%[x2] \n"
488 "subps %%xmm1,%%xmm0 \n"
489 "movaps %[a2],%%xmm1 \n"
490 "mulps %[y2],%%xmm1 \n"
491 "mulps %[b2],%[y2] \n"
492 "subps %%xmm1,%%xmm0 \n"
493 "movaps %[b0],%%xmm1 \n"
494 "mulps %%xmm0,%%xmm1 \n"
495 "addps %[x2],%%xmm1 \n"
496 "movaps %[x1],%[x2] \n"
497 "movaps %%xmm2,%[x1] \n"
498 "addps %[y2],%%xmm1 \n"
499 "movaps %[a1],%[y2] \n"
500 "mulps %[z1],%[y2] \n"
501 "subps %[y2],%%xmm1 \n"
502 "movaps %[y1],%[y2] \n"
503 "movaps %%xmm0,%[y1] \n"
504 "subps %[z2],%%xmm1 \n"
505 "movaps %[z1],%[z2] \n"
506 "movaps %%xmm1,%[z1] \n"
507 "haddps %%xmm1, %%xmm1 \n"
508 "movss %%xmm1, (%[dataL]) \n"
509 "shufps $0x39, %%xmm1, %%xmm1 \n"
510 "movss %%xmm1, (%[dataR]) \n"
511 "add $4, %[dataL] \n"
512 "add $4, %[dataR] \n"
513 "sub $1, %[count] \n"
514 "jnz 1b \n"
515 : /* output */
516 [dataL]"+r"(dataL),
517 [dataR]"+r"(dataR),
518 [count]"+r"(count),
519 [x1]"+x"(x1),
520 [x2]"+x"(x2),
521 [y1]"+x"(y1),
522 [y2]"+x"(y2),
523 [z1]"+x"(z1),
524 [z2]"+x"(z2)
525 : /* input */
526 [b0]"x"(b0),
527 [b1]"x"(b1),
528 [b2]"x"(b2),
529 [a1]"x"(a1),
530 [a2]"x"(a2)
531 : /* clobber */
532 "xmm0", "xmm1", "xmm2", "memory", "cc");
533 // clang-format on
534
535 lp->x1L = x1[0];
536 lp->x1R = x1[2];
537 lp->x2L = x2[0];
538 lp->x2R = x2[2];
539 lp->y1L = y1[0];
540 lp->y1R = y1[2];
541 lp->y2L = y2[0];
542 lp->y2R = y2[2];
543 lp->z1L = z1[0];
544 lp->z1R = z1[2];
545 lp->z2L = z2[0];
546 lp->z2R = z2[2];
547
548 hp->x1L = x1[1];
549 hp->x1R = x1[3];
550 hp->x2L = x2[1];
551 hp->x2R = x2[3];
552 hp->y1L = y1[1];
553 hp->y1R = y1[3];
554 hp->y2L = y2[1];
555 hp->y2R = y2[3];
556 hp->z1L = z1[1];
557 hp->z1R = z1[3];
558 hp->z2L = z2[1];
559 hp->z2R = z2[3];
560 }
561 #else
lr42_merge(struct lr42 * lp,struct lr42 * hp,int count,float * dataL,float * dataR)562 static void lr42_merge(struct lr42 *lp, struct lr42 *hp, int count,
563 float *dataL, float *dataR)
564 {
565 float lx1L = lp->x1L, lx1R = lp->x1R;
566 float lx2L = lp->x2L, lx2R = lp->x2R;
567 float ly1L = lp->y1L, ly1R = lp->y1R;
568 float ly2L = lp->y2L, ly2R = lp->y2R;
569 float lz1L = lp->z1L, lz1R = lp->z1R;
570 float lz2L = lp->z2L, lz2R = lp->z2R;
571 float lb0 = lp->b0;
572 float lb1 = lp->b1;
573 float lb2 = lp->b2;
574 float la1 = lp->a1;
575 float la2 = lp->a2;
576
577 float hx1L = hp->x1L, hx1R = hp->x1R;
578 float hx2L = hp->x2L, hx2R = hp->x2R;
579 float hy1L = hp->y1L, hy1R = hp->y1R;
580 float hy2L = hp->y2L, hy2R = hp->y2R;
581 float hz1L = hp->z1L, hz1R = hp->z1R;
582 float hz2L = hp->z2L, hz2R = hp->z2R;
583 float hb0 = hp->b0;
584 float hb1 = hp->b1;
585 float hb2 = hp->b2;
586 float ha1 = hp->a1;
587 float ha2 = hp->a2;
588
589 int i;
590 for (i = 0; i < count; i++) {
591 float xL, yL, zL, xR, yR, zR;
592 xL = dataL[i];
593 xR = dataR[i];
594 yL = lb0 * xL + lb1 * lx1L + lb2 * lx2L - la1 * ly1L -
595 la2 * ly2L;
596 yR = lb0 * xR + lb1 * lx1R + lb2 * lx2R - la1 * ly1R -
597 la2 * ly2R;
598 zL = lb0 * yL + lb1 * ly1L + lb2 * ly2L - la1 * lz1L -
599 la2 * lz2L;
600 zR = lb0 * yR + lb1 * ly1R + lb2 * ly2R - la1 * lz1R -
601 la2 * lz2R;
602 lx2L = lx1L;
603 lx2R = lx1R;
604 lx1L = xL;
605 lx1R = xR;
606 ly2L = ly1L;
607 ly2R = ly1R;
608 ly1L = yL;
609 ly1R = yR;
610 lz2L = lz1L;
611 lz2R = lz1R;
612 lz1L = zL;
613 lz1R = zR;
614
615 yL = hb0 * xL + hb1 * hx1L + hb2 * hx2L - ha1 * hy1L -
616 ha2 * hy2L;
617 yR = hb0 * xR + hb1 * hx1R + hb2 * hx2R - ha1 * hy1R -
618 ha2 * hy2R;
619 zL = hb0 * yL + hb1 * hy1L + hb2 * hy2L - ha1 * hz1L -
620 ha2 * hz2L;
621 zR = hb0 * yR + hb1 * hy1R + hb2 * hy2R - ha1 * hz1R -
622 ha2 * hz2R;
623 hx2L = hx1L;
624 hx2R = hx1R;
625 hx1L = xL;
626 hx1R = xR;
627 hy2L = hy1L;
628 hy2R = hy1R;
629 hy1L = yL;
630 hy1R = yR;
631 hz2L = hz1L;
632 hz2R = hz1R;
633 hz1L = zL;
634 hz1R = zR;
635 dataL[i] = zL + lz1L;
636 dataR[i] = zR + lz1R;
637 }
638
639 lp->x1L = lx1L;
640 lp->x1R = lx1R;
641 lp->x2L = lx2L;
642 lp->x2R = lx2R;
643 lp->y1L = ly1L;
644 lp->y1R = ly1R;
645 lp->y2L = ly2L;
646 lp->y2R = ly2R;
647 lp->z1L = lz1L;
648 lp->z1R = lz1R;
649 lp->z2L = lz2L;
650 lp->z2R = lz2R;
651
652 hp->x1L = hx1L;
653 hp->x1R = hx1R;
654 hp->x2L = hx2L;
655 hp->x2R = hx2R;
656 hp->y1L = hy1L;
657 hp->y1R = hy1R;
658 hp->y2L = hy2L;
659 hp->y2R = hy2R;
660 hp->z1L = hz1L;
661 hp->z1R = hz1R;
662 hp->z2L = hz2L;
663 hp->z2R = hz2R;
664 }
665 #endif
666
crossover2_init(struct crossover2 * xo2,float freq1,float freq2)667 void crossover2_init(struct crossover2 *xo2, float freq1, float freq2)
668 {
669 int i;
670 for (i = 0; i < 3; i++) {
671 float f = (i == 0) ? freq1 : freq2;
672 lr42_set(&xo2->lp[i], BQ_LOWPASS, f);
673 lr42_set(&xo2->hp[i], BQ_HIGHPASS, f);
674 }
675 }
676
crossover2_process(struct crossover2 * xo2,int count,float * data0L,float * data0R,float * data1L,float * data1R,float * data2L,float * data2R)677 void crossover2_process(struct crossover2 *xo2, int count, float *data0L,
678 float *data0R, float *data1L, float *data1R,
679 float *data2L, float *data2R)
680 {
681 if (!count)
682 return;
683
684 lr42_split(&xo2->lp[0], &xo2->hp[0], count, data0L, data0R, data1L,
685 data1R);
686 lr42_merge(&xo2->lp[1], &xo2->hp[1], count, data0L, data0R);
687 lr42_split(&xo2->lp[2], &xo2->hp[2], count, data1L, data1R, data2L,
688 data2R);
689 }
690