• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2013 The Chromium OS Authors. All rights reserved.
2  * Use of this source code is governed by a BSD-style license that can be
3  * found in the LICENSE file.
4  */
5 
6 #include <limits.h>
7 #include <syslog.h>
8 
9 #include "dsp_util.h"
10 
11 #ifndef max
12 #define max(a, b)                                                              \
13 	({                                                                     \
14 		__typeof__(a) _a = (a);                                        \
15 		__typeof__(b) _b = (b);                                        \
16 		_a > _b ? _a : _b;                                             \
17 	})
18 #endif
19 
20 #ifndef min
21 #define min(a, b)                                                              \
22 	({                                                                     \
23 		__typeof__(a) _a = (a);                                        \
24 		__typeof__(b) _b = (b);                                        \
25 		_a < _b ? _a : _b;                                             \
26 	})
27 #endif
28 
29 #undef deinterleave_stereo
30 #undef interleave_stereo
31 
32 /* Converts shorts in range of -32768 to 32767 to floats in range of
33  * -1.0f to 1.0f.
34  * scvtf instruction accepts fixed point ints, so sxtl is used to lengthen
35  * shorts to int with sign extension.
36  */
37 #ifdef __aarch64__
deinterleave_stereo(int16_t * input,float * output1,float * output2,int frames)38 static void deinterleave_stereo(int16_t *input, float *output1, float *output2,
39 				int frames)
40 {
41 	int chunk = frames >> 3;
42 	frames &= 7;
43 	/* Process 8 frames (16 samples) each loop. */
44 	/* L0 R0 L1 R1 L2 R2 L3 R3... -> L0 L1 L2 L3... R0 R1 R2 R3... */
45 	if (chunk) {
46 		// clang-format off
47 		__asm__ __volatile__(
48 			"1:                                         \n"
49 			"ld2  {v2.8h, v3.8h}, [%[input]], #32       \n"
50 			"subs %w[chunk], %w[chunk], #1              \n"
51 			"sxtl   v0.4s, v2.4h                        \n"
52 			"sxtl2  v1.4s, v2.8h                        \n"
53 			"sxtl   v2.4s, v3.4h                        \n"
54 			"sxtl2  v3.4s, v3.8h                        \n"
55 			"scvtf  v0.4s, v0.4s, #15                   \n"
56 			"scvtf  v1.4s, v1.4s, #15                   \n"
57 			"scvtf  v2.4s, v2.4s, #15                   \n"
58 			"scvtf  v3.4s, v3.4s, #15                   \n"
59 			"st1    {v0.4s, v1.4s}, [%[output1]], #32   \n"
60 			"st1    {v2.4s, v3.4s}, [%[output2]], #32   \n"
61 			"b.ne   1b                                  \n"
62 			: /* output */
63 			  [chunk]"+r"(chunk),
64 			  [input]"+r"(input),
65 			  [output1]"+r"(output1),
66 			  [output2]"+r"(output2)
67 			: /* input */
68 			: /* clobber */
69 			  "v0", "v1", "v2", "v3", "memory", "cc");
70 		// clang-format on
71 	}
72 
73 	/* The remaining samples. */
74 	while (frames--) {
75 		*output1++ = *input++ / 32768.0f;
76 		*output2++ = *input++ / 32768.0f;
77 	}
78 }
79 #define deinterleave_stereo deinterleave_stereo
80 
81 /* Converts floats in range of -1.0f to 1.0f to shorts in range of
82  * -32768 to 32767 with rounding to nearest, with ties (0.5) rounding away
83  * from zero.
84  * Rounding is achieved by using fcvtas instruction. (a = away)
85  * The float scaled to a range of -32768 to 32767 by adding 15 to the exponent.
86  * Add to exponent is equivalent to multiply for exponent range of 0 to 239,
87  * which is 2.59 * 10^33.  A signed saturating add (sqadd) limits exponents
88  * from 240 to 255 to clamp to 255.
89  * For very large values, beyond +/- 2 billion, fcvtas will clamp the result
90  * to the min or max value that fits an int.
91  * For other values, sqxtn clamps the output to -32768 to 32767 range.
92  */
interleave_stereo(float * input1,float * input2,int16_t * output,int frames)93 static void interleave_stereo(float *input1, float *input2, int16_t *output,
94 			      int frames)
95 {
96 	/* Process 4 frames (8 samples) each loop. */
97 	/* L0 L1 L2 L3, R0 R1 R2 R3 -> L0 R0 L1 R1, L2 R2 L3 R3 */
98 	int chunk = frames >> 2;
99 	frames &= 3;
100 
101 	if (chunk) {
102 		// clang-format off
103 		__asm__ __volatile__(
104 			"dup    v2.4s, %w[scale]                    \n"
105 			"1:                                         \n"
106 			"ld1    {v0.4s}, [%[input1]], #16           \n"
107 			"ld1    {v1.4s}, [%[input2]], #16           \n"
108 			"subs   %w[chunk], %w[chunk], #1            \n"
109 			"sqadd  v0.4s, v0.4s, v2.4s                 \n"
110 			"sqadd  v1.4s, v1.4s, v2.4s                 \n"
111 			"fcvtas v0.4s, v0.4s                        \n"
112 			"fcvtas v1.4s, v1.4s                        \n"
113 			"sqxtn  v0.4h, v0.4s                        \n"
114 			"sqxtn  v1.4h, v1.4s                        \n"
115 			"st2    {v0.4h, v1.4h}, [%[output]], #16    \n"
116 			"b.ne   1b                                  \n"
117 			: /* output */
118 			  [chunk]"+r"(chunk),
119 			  [input1]"+r"(input1),
120 			  [input2]"+r"(input2),
121 			  [output]"+r"(output)
122 			: /* input */
123 			  [scale]"r"(15 << 23)
124 			: /* clobber */
125 			  "v0", "v1", "v2", "memory", "cc");
126 		// clang-format on
127 	}
128 
129 	/* The remaining samples */
130 	while (frames--) {
131 		float f;
132 		f = *input1++ * 32768.0f;
133 		f += (f >= 0) ? 0.5f : -0.5f;
134 		*output++ = max(-32768, min(32767, (int)(f)));
135 		f = *input2++ * 32768.0f;
136 		f += (f >= 0) ? 0.5f : -0.5f;
137 		*output++ = max(-32768, min(32767, (int)(f)));
138 	}
139 }
140 #define interleave_stereo interleave_stereo
141 #endif
142 
143 #ifdef __ARM_NEON__
144 #include <arm_neon.h>
145 
deinterleave_stereo(int16_t * input,float * output1,float * output2,int frames)146 static void deinterleave_stereo(int16_t *input, float *output1, float *output2,
147 				int frames)
148 {
149 	/* Process 8 frames (16 samples) each loop. */
150 	/* L0 R0 L1 R1 L2 R2 L3 R3... -> L0 L1 L2 L3... R0 R1 R2 R3... */
151 	int chunk = frames >> 3;
152 	frames &= 7;
153 	if (chunk) {
154 		// clang-format off
155 		__asm__ __volatile__(
156 			"1:					    \n"
157 			"vld2.16 {d0-d3}, [%[input]]!		    \n"
158 			"subs %[chunk], #1			    \n"
159 			"vmovl.s16 q3, d3			    \n"
160 			"vmovl.s16 q2, d2			    \n"
161 			"vmovl.s16 q1, d1			    \n"
162 			"vmovl.s16 q0, d0			    \n"
163 			"vcvt.f32.s32 q3, q3, #15		    \n"
164 			"vcvt.f32.s32 q2, q2, #15		    \n"
165 			"vcvt.f32.s32 q1, q1, #15		    \n"
166 			"vcvt.f32.s32 q0, q0, #15		    \n"
167 			"vst1.32 {d4-d7}, [%[output2]]!		    \n"
168 			"vst1.32 {d0-d3}, [%[output1]]!		    \n"
169 			"bne 1b					    \n"
170 			: /* output */
171 			  [chunk]"+r"(chunk),
172 			  [input]"+r"(input),
173 			  [output1]"+r"(output1),
174 			  [output2]"+r"(output2)
175 			: /* input */
176 			: /* clobber */
177 			  "q0", "q1", "q2", "q3", "memory", "cc");
178 		// clang-format on
179 	}
180 
181 	/* The remaining samples. */
182 	while (frames--) {
183 		*output1++ = *input++ / 32768.0f;
184 		*output2++ = *input++ / 32768.0f;
185 	}
186 }
187 #define deinterleave_stereo deinterleave_stereo
188 
189 /* Converts floats in range of -1.0f to 1.0f to shorts in range of
190  * -32768 to 32767 with rounding to nearest, with ties (0.5) rounding away
191  * from zero.
192  * Rounding is achieved by adding 0.5 or -0.5 adjusted for fixed point
193  * precision, and then converting float to fixed point using vcvt instruction
194  * which truncated toward zero.
195  * For very large values, beyond +/- 2 billion, vcvt will clamp the result
196  * to the min or max value that fits an int.
197  * For other values, vqmovn clamps the output to -32768 to 32767 range.
198  */
interleave_stereo(float * input1,float * input2,int16_t * output,int frames)199 static void interleave_stereo(float *input1, float *input2, int16_t *output,
200 			      int frames)
201 {
202 	/* Process 4 frames (8 samples) each loop. */
203 	/* L0 L1 L2 L3, R0 R1 R2 R3 -> L0 R0 L1 R1, L2 R2 L3 R3 */
204 	float32x4_t pos = vdupq_n_f32(0.5f / 32768.0f);
205 	float32x4_t neg = vdupq_n_f32(-0.5f / 32768.0f);
206 	int chunk = frames >> 2;
207 	frames &= 3;
208 
209 	if (chunk) {
210 		// clang-format off
211 		__asm__ __volatile__(
212 			"veor q0, q0, q0			    \n"
213 			"1:					    \n"
214 			"vld1.32 {d2-d3}, [%[input1]]!		    \n"
215 			"vld1.32 {d4-d5}, [%[input2]]!		    \n"
216 			"subs %[chunk], #1			    \n"
217 			/* We try to round to the nearest number by adding 0.5
218 			 * to positive input, and adding -0.5 to the negative
219 			 * input, then truncate.
220 			 */
221 			"vcgt.f32 q3, q1, q0			    \n"
222 			"vcgt.f32 q4, q2, q0			    \n"
223 			"vbsl q3, %q[pos], %q[neg]		    \n"
224 			"vbsl q4, %q[pos], %q[neg]		    \n"
225 			"vadd.f32 q1, q1, q3			    \n"
226 			"vadd.f32 q2, q2, q4			    \n"
227 			"vcvt.s32.f32 q1, q1, #15		    \n"
228 			"vcvt.s32.f32 q2, q2, #15		    \n"
229 			"vqmovn.s32 d2, q1			    \n"
230 			"vqmovn.s32 d3, q2			    \n"
231 			"vst2.16 {d2-d3}, [%[output]]!		    \n"
232 			"bne 1b					    \n"
233 			: /* output */
234 			  [chunk]"+r"(chunk),
235 			  [input1]"+r"(input1),
236 			  [input2]"+r"(input2),
237 			  [output]"+r"(output)
238 			: /* input */
239 			  [pos]"w"(pos),
240 			  [neg]"w"(neg)
241 			: /* clobber */
242 			  "q0", "q1", "q2", "q3", "q4", "memory", "cc");
243 		// clang-format on
244 	}
245 
246 	/* The remaining samples */
247 	while (frames--) {
248 		float f;
249 		f = *input1++ * 32768.0f;
250 		f += (f >= 0) ? 0.5f : -0.5f;
251 		*output++ = max(-32768, min(32767, (int)(f)));
252 		f = *input2++ * 32768.0f;
253 		f += (f >= 0) ? 0.5f : -0.5f;
254 		*output++ = max(-32768, min(32767, (int)(f)));
255 	}
256 }
257 #define interleave_stereo interleave_stereo
258 #endif
259 
260 #ifdef __SSE3__
261 #include <emmintrin.h>
262 
263 /* Converts shorts in range of -32768 to 32767 to floats in range of
264  * -1.0f to 1.0f.
265  * pslld and psrad shifts are used to isolate the low and high word, but
266  * each in a different range:
267  * The low word is shifted to the high bits in range 0x80000000 .. 0x7fff0000.
268  * The high word is shifted to the low bits in range 0x00008000 .. 0x00007fff.
269  * cvtdq2ps converts ints to floats as is.
270  * mulps is used to normalize the range of the low and high words, adjusting
271  * for high and low words being in different range.
272  */
deinterleave_stereo(int16_t * input,float * output1,float * output2,int frames)273 static void deinterleave_stereo(int16_t *input, float *output1, float *output2,
274 				int frames)
275 {
276 	/* Process 8 frames (16 samples) each loop. */
277 	/* L0 R0 L1 R1 L2 R2 L3 R3... -> L0 L1 L2 L3... R0 R1 R2 R3... */
278 	int chunk = frames >> 3;
279 	frames &= 7;
280 	if (chunk) {
281 		// clang-format off
282 		__asm__ __volatile__(
283 			"1:                                         \n"
284 			"lddqu (%[input]), %%xmm0                   \n"
285 			"lddqu 16(%[input]), %%xmm1                 \n"
286 			"add $32, %[input]                          \n"
287 			"movdqa %%xmm0, %%xmm2                      \n"
288 			"movdqa %%xmm1, %%xmm3                      \n"
289 			"pslld $16, %%xmm0                          \n"
290 			"pslld $16, %%xmm1                          \n"
291 			"psrad $16, %%xmm2                          \n"
292 			"psrad $16, %%xmm3                          \n"
293 			"cvtdq2ps %%xmm0, %%xmm0                    \n"
294 			"cvtdq2ps %%xmm1, %%xmm1                    \n"
295 			"cvtdq2ps %%xmm2, %%xmm2                    \n"
296 			"cvtdq2ps %%xmm3, %%xmm3                    \n"
297 			"mulps %[scale_2_n31], %%xmm0               \n"
298 			"mulps %[scale_2_n31], %%xmm1               \n"
299 			"mulps %[scale_2_n15], %%xmm2               \n"
300 			"mulps %[scale_2_n15], %%xmm3               \n"
301 			"movdqu %%xmm0, (%[output1])                \n"
302 			"movdqu %%xmm1, 16(%[output1])              \n"
303 			"movdqu %%xmm2, (%[output2])                \n"
304 			"movdqu %%xmm3, 16(%[output2])              \n"
305 			"add $32, %[output1]                        \n"
306 			"add $32, %[output2]                        \n"
307 			"sub $1, %[chunk]                           \n"
308 			"jnz 1b                                     \n"
309 			: /* output */
310 			  [chunk]"+r"(chunk),
311 			  [input]"+r"(input),
312 			  [output1]"+r"(output1),
313 			  [output2]"+r"(output2)
314 			: /* input */
315 			  [scale_2_n31]"x"(_mm_set1_ps(1.0f/(1<<15)/(1<<16))),
316 			  [scale_2_n15]"x"(_mm_set1_ps(1.0f/(1<<15)))
317 			: /* clobber */
318 			  "xmm0", "xmm1", "xmm2", "xmm3", "memory", "cc");
319 		// clang-format on
320 	}
321 
322 	/* The remaining samples. */
323 	while (frames--) {
324 		*output1++ = *input++ / 32768.0f;
325 		*output2++ = *input++ / 32768.0f;
326 	}
327 }
328 #define deinterleave_stereo deinterleave_stereo
329 
330 /* Converts floats in range of -1.0f to 1.0f to shorts in range of
331  * -32768 to 32767 with rounding to nearest, with ties (0.5) rounding to
332  * even.
333  * For very large values, beyond +/- 2 billion, cvtps2dq will produce
334  * 0x80000000 and packssdw will clamp -32768.
335  */
interleave_stereo(float * input1,float * input2,int16_t * output,int frames)336 static void interleave_stereo(float *input1, float *input2, int16_t *output,
337 			      int frames)
338 {
339 	/* Process 4 frames (8 samples) each loop. */
340 	/* L0 L1 L2 L3, R0 R1 R2 R3 -> L0 R0 L1 R1, L2 R2 L3 R3 */
341 	int chunk = frames >> 2;
342 	frames &= 3;
343 
344 	if (chunk) {
345 		// clang-format off
346 		__asm__ __volatile__(
347 			"1:                                         \n"
348 			"lddqu (%[input1]), %%xmm0                  \n"
349 			"lddqu (%[input2]), %%xmm2                  \n"
350 			"add $16, %[input1]                         \n"
351 			"add $16, %[input2]                         \n"
352 			"movaps %%xmm0, %%xmm1                      \n"
353 			"unpcklps %%xmm2, %%xmm0                    \n"
354 			"unpckhps %%xmm2, %%xmm1                    \n"
355 			"paddsw %[scale_2_15], %%xmm0               \n"
356 			"paddsw %[scale_2_15], %%xmm1               \n"
357 			"cvtps2dq %%xmm0, %%xmm0                    \n"
358 			"cvtps2dq %%xmm1, %%xmm1                    \n"
359 			"packssdw %%xmm1, %%xmm0                    \n"
360 			"movdqu %%xmm0, (%[output])                 \n"
361 			"add $16, %[output]                         \n"
362 			"sub $1, %[chunk]                           \n"
363 			"jnz 1b                                     \n"
364 			: /* output */
365 			  [chunk]"+r"(chunk),
366 			  [input1]"+r"(input1),
367 			  [input2]"+r"(input2),
368 			  [output]"+r"(output)
369 			: /* input */
370 			  [scale_2_15]"x"(_mm_set1_epi32(15 << 23)),
371 			  [clamp_large]"x"(_mm_set1_ps(32767.0f))
372 			: /* clobber */
373 			  "xmm0", "xmm1", "xmm2", "memory", "cc");
374 		// clang-format on
375 	}
376 
377 	/* The remaining samples */
378 	while (frames--) {
379 		float f;
380 		f = *input1++ * 32768.0f;
381 		f += (f >= 0) ? 0.5f : -0.5f;
382 		*output++ = max(-32768, min(32767, (int)(f)));
383 		f = *input2++ * 32768.0f;
384 		f += (f >= 0) ? 0.5f : -0.5f;
385 		*output++ = max(-32768, min(32767, (int)(f)));
386 	}
387 }
388 #define interleave_stereo interleave_stereo
389 #endif
390 
dsp_util_deinterleave_s16le(int16_t * input,float * const * output,int channels,int frames)391 static void dsp_util_deinterleave_s16le(int16_t *input, float *const *output,
392 					int channels, int frames)
393 {
394 	float *output_ptr[channels];
395 	int i, j;
396 
397 #ifdef deinterleave_stereo
398 	if (channels == 2) {
399 		deinterleave_stereo(input, output[0], output[1], frames);
400 		return;
401 	}
402 #endif
403 
404 	for (i = 0; i < channels; i++)
405 		output_ptr[i] = output[i];
406 
407 	for (i = 0; i < frames; i++)
408 		for (j = 0; j < channels; j++)
409 			*(output_ptr[j]++) = *input++ / 32768.0f;
410 }
411 
dsp_util_deinterleave_s24le(int32_t * input,float * const * output,int channels,int frames)412 static void dsp_util_deinterleave_s24le(int32_t *input, float *const *output,
413 					int channels, int frames)
414 {
415 	float *output_ptr[channels];
416 	int i, j;
417 
418 	for (i = 0; i < channels; i++)
419 		output_ptr[i] = output[i];
420 
421 	for (i = 0; i < frames; i++)
422 		for (j = 0; j < channels; j++, input++)
423 			*(output_ptr[j]++) = (*input << 8) / 2147483648.0f;
424 }
425 
dsp_util_deinterleave_s243le(uint8_t * input,float * const * output,int channels,int frames)426 static void dsp_util_deinterleave_s243le(uint8_t *input, float *const *output,
427 					 int channels, int frames)
428 {
429 	float *output_ptr[channels];
430 	int32_t sample;
431 	int i, j;
432 
433 	for (i = 0; i < channels; i++)
434 		output_ptr[i] = output[i];
435 
436 	for (i = 0; i < frames; i++)
437 		for (j = 0; j < channels; j++, input += 3) {
438 			sample = 0;
439 			memcpy((uint8_t *)&sample + 1, input, 3);
440 			*(output_ptr[j]++) = sample / 2147483648.0f;
441 		}
442 }
443 
dsp_util_deinterleave_s32le(int32_t * input,float * const * output,int channels,int frames)444 static void dsp_util_deinterleave_s32le(int32_t *input, float *const *output,
445 					int channels, int frames)
446 {
447 	float *output_ptr[channels];
448 	int i, j;
449 
450 	for (i = 0; i < channels; i++)
451 		output_ptr[i] = output[i];
452 
453 	for (i = 0; i < frames; i++)
454 		for (j = 0; j < channels; j++, input++)
455 			*(output_ptr[j]++) = *input / 2147483648.0f;
456 }
457 
dsp_util_deinterleave(uint8_t * input,float * const * output,int channels,snd_pcm_format_t format,int frames)458 int dsp_util_deinterleave(uint8_t *input, float *const *output, int channels,
459 			  snd_pcm_format_t format, int frames)
460 {
461 	switch (format) {
462 	case SND_PCM_FORMAT_S16_LE:
463 		dsp_util_deinterleave_s16le((int16_t *)input, output, channels,
464 					    frames);
465 		break;
466 	case SND_PCM_FORMAT_S24_LE:
467 		dsp_util_deinterleave_s24le((int32_t *)input, output, channels,
468 					    frames);
469 		break;
470 	case SND_PCM_FORMAT_S24_3LE:
471 		dsp_util_deinterleave_s243le(input, output, channels, frames);
472 		break;
473 	case SND_PCM_FORMAT_S32_LE:
474 		dsp_util_deinterleave_s32le((int32_t *)input, output, channels,
475 					    frames);
476 		break;
477 	default:
478 		syslog(LOG_ERR, "Invalid format to deinterleave");
479 		return -EINVAL;
480 	}
481 	return 0;
482 }
483 
dsp_util_interleave_s16le(float * const * input,int16_t * output,int channels,int frames)484 static void dsp_util_interleave_s16le(float *const *input, int16_t *output,
485 				      int channels, int frames)
486 {
487 	float *input_ptr[channels];
488 	int i, j;
489 
490 #ifdef interleave_stereo
491 	if (channels == 2) {
492 		interleave_stereo(input[0], input[1], output, frames);
493 		return;
494 	}
495 #endif
496 
497 	for (i = 0; i < channels; i++)
498 		input_ptr[i] = input[i];
499 
500 	for (i = 0; i < frames; i++)
501 		for (j = 0; j < channels; j++) {
502 			float f = *(input_ptr[j]++) * 32768.0f;
503 			f += (f >= 0) ? 0.5f : -0.5f;
504 			*output++ = max(-32768, min(32767, (int)(f)));
505 		}
506 }
507 
dsp_util_interleave_s24le(float * const * input,int32_t * output,int channels,int frames)508 static void dsp_util_interleave_s24le(float *const *input, int32_t *output,
509 				      int channels, int frames)
510 {
511 	float *input_ptr[channels];
512 	int i, j;
513 
514 	for (i = 0; i < channels; i++)
515 		input_ptr[i] = input[i];
516 
517 	for (i = 0; i < frames; i++)
518 		for (j = 0; j < channels; j++, output++) {
519 			float f = *(input_ptr[j]++) * 2147483648.0f;
520 			f += (f >= 0) ? 0.5f : -0.5f;
521 			*output = max((float)INT_MIN, min((float)INT_MAX, f));
522 			*output = (*output >> 8) & 0x00ffffff;
523 		}
524 }
525 
dsp_util_interleave_s243le(float * const * input,uint8_t * output,int channels,int frames)526 static void dsp_util_interleave_s243le(float *const *input, uint8_t *output,
527 				       int channels, int frames)
528 {
529 	float *input_ptr[channels];
530 	int i, j;
531 	int32_t tmp;
532 
533 	for (i = 0; i < channels; i++)
534 		input_ptr[i] = input[i];
535 
536 	for (i = 0; i < frames; i++)
537 		for (j = 0; j < channels; j++, output += 3) {
538 			float f = *(input_ptr[j]++) * 2147483648.0f;
539 			f += (f >= 0) ? 0.5f : -0.5f;
540 			tmp = max((float)INT_MIN, min((float)INT_MAX, f));
541 			tmp >>= 8;
542 			memcpy(output, &tmp, 3);
543 		}
544 }
545 
dsp_util_interleave_s32le(float * const * input,int32_t * output,int channels,int frames)546 static void dsp_util_interleave_s32le(float *const *input, int32_t *output,
547 				      int channels, int frames)
548 {
549 	float *input_ptr[channels];
550 	int i, j;
551 
552 	for (i = 0; i < channels; i++)
553 		input_ptr[i] = input[i];
554 
555 	for (i = 0; i < frames; i++)
556 		for (j = 0; j < channels; j++, output++) {
557 			float f = *(input_ptr[j]++) * 2147483648.0f;
558 			f += (f >= 0) ? 0.5f : -0.5f;
559 			*output = max((float)INT_MIN, min((float)INT_MAX, f));
560 		}
561 }
562 
dsp_util_interleave(float * const * input,uint8_t * output,int channels,snd_pcm_format_t format,int frames)563 int dsp_util_interleave(float *const *input, uint8_t *output, int channels,
564 			snd_pcm_format_t format, int frames)
565 {
566 	switch (format) {
567 	case SND_PCM_FORMAT_S16_LE:
568 		dsp_util_interleave_s16le(input, (int16_t *)output, channels,
569 					  frames);
570 		break;
571 	case SND_PCM_FORMAT_S24_LE:
572 		dsp_util_interleave_s24le(input, (int32_t *)output, channels,
573 					  frames);
574 		break;
575 	case SND_PCM_FORMAT_S24_3LE:
576 		dsp_util_interleave_s243le(input, output, channels, frames);
577 		break;
578 	case SND_PCM_FORMAT_S32_LE:
579 		dsp_util_interleave_s32le(input, (int32_t *)output, channels,
580 					  frames);
581 		break;
582 	default:
583 		syslog(LOG_ERR, "Invalid format to interleave");
584 		return -EINVAL;
585 	}
586 	return 0;
587 }
588 
dsp_enable_flush_denormal_to_zero()589 void dsp_enable_flush_denormal_to_zero()
590 {
591 #if defined(__i386__) || defined(__x86_64__)
592 	unsigned int mxcsr;
593 	mxcsr = __builtin_ia32_stmxcsr();
594 	__builtin_ia32_ldmxcsr(mxcsr | 0x8040);
595 #elif defined(__aarch64__)
596 	uint64_t cw;
597 	__asm__ __volatile__("mrs    %0, fpcr			    \n"
598 			     "orr    %0, %0, #0x1000000		    \n"
599 			     "msr    fpcr, %0			    \n"
600 			     "isb					    \n"
601 			     : "=r"(cw)::"memory");
602 #elif defined(__arm__)
603 	uint32_t cw;
604 	__asm__ __volatile__("vmrs   %0, fpscr			    \n"
605 			     "orr    %0, %0, #0x1000000		    \n"
606 			     "vmsr   fpscr, %0			    \n"
607 			     : "=r"(cw)::"memory");
608 #else
609 #warning "Don't know how to disable denorms. Performace may suffer."
610 #endif
611 }
612