• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_AUDIO_PRIMITIVES_H
18 #define ANDROID_AUDIO_PRIMITIVES_H
19 
20 #include <math.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <sys/cdefs.h>
24 
25 /** \cond */
26 __BEGIN_DECLS
27 /** \endcond */
28 
29 /**
30  * \file primitives.h
31  * The memcpy_* conversion routines are designed to work in-place on same dst as src
32  * buffers only if the types shrink on copy, with the exception of memcpy_to_i16_from_u8().
33  * This allows the loops to go upwards for faster cache access (and may be more flexible
34  * for future optimization later).
35  */
36 
37 /**
38  * Deprecated. Use memcpy_to_i16_from_q4_27() instead (double the pairs for the count).
39  * Neither this function nor memcpy_to_i16_from_q4_27() actually dither.
40  *
41  * Dither and clamp pairs of 32-bit input samples (sums) to 16-bit output samples (out).
42  * Each 32-bit input sample can be viewed as a signed fixed-point Q19.12 of which the
43  * .12 fraction bits are dithered and the 19 integer bits are clamped to signed 16 bits.
44  * Alternatively the input can be viewed as Q4.27, of which the lowest .12 of the fraction
45  * is dithered and the remaining fraction is converted to the output Q.15, with clamping
46  * on the 4 integer guard bits.
47  *
48  * For interleaved stereo, pairs is the number of sample pairs,
49  * and out is an array of interleaved pairs of 16-bit samples per channel.
50  * For mono, pairs is the number of samples / 2, and out is an array of 16-bit samples.
51  * The name "dither" is a misnomer; the current implementation does not actually dither
52  * but uses truncation.  This may change.
53  * The out and sums buffers must either be completely separate (non-overlapping), or
54  * they must both start at the same address.  Partially overlapping buffers are not supported.
55  */
56 void ditherAndClamp(int32_t *out, const int32_t *sums, size_t pairs);
57 
58 /**
59  * Copy samples from signed fixed-point 32-bit Q4.27 to 16-bit Q0.15
60  *
61  *  \param dst     Destination buffer
62  *  \param src     Source buffer
63  *  \param count   Number of samples to copy
64  *
65  * The destination and source buffers must either be completely separate (non-overlapping), or
66  * they must both start at the same address.  Partially overlapping buffers are not supported.
67  */
68 void memcpy_to_i16_from_q4_27(int16_t *dst, const int32_t *src, size_t count);
69 
70 /**
71  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 16-bit.
72  *
73  *  \param dst     Destination buffer
74  *  \param src     Source buffer
75  *  \param count   Number of samples to copy
76  *
77  * The destination and source buffers must either be completely separate (non-overlapping), or
78  * they must both start at the same address.  Partially overlapping buffers are not supported.
79  */
80 void memcpy_to_i16_from_u8(int16_t *dst, const uint8_t *src, size_t count);
81 
82 /**
83  * Shrink and copy samples from signed 16-bit to unsigned 8-bit offset by 0x80.
84  *
85  *  \param dst     Destination buffer
86  *  \param src     Source buffer
87  *  \param count   Number of samples to copy
88  *
89  * The destination and source buffers must either be completely separate (non-overlapping), or
90  * they must both start at the same address.  Partially overlapping buffers are not supported.
91  * The conversion is done by truncation, without dithering, so it loses resolution.
92  */
93 void memcpy_to_u8_from_i16(uint8_t *dst, const int16_t *src, size_t count);
94 
95 /**
96  * Copy samples from float to unsigned 8-bit offset by 0x80.
97  *
98  *  \param dst     Destination buffer
99  *  \param src     Source buffer
100  *  \param count   Number of samples to copy
101  *
102  * The destination and source buffers must either be completely separate (non-overlapping), or
103  * they must both start at the same address.  Partially overlapping buffers are not supported.
104  * The conversion is done by truncation, without dithering, so it loses resolution.
105  */
106 void memcpy_to_u8_from_float(uint8_t *dst, const float *src, size_t count);
107 
108 /**
109  * Shrink and copy samples from signed 32-bit fixed-point Q0.31 to signed 16-bit Q0.15.
110  *
111  *  \param dst     Destination buffer
112  *  \param src     Source buffer
113  *  \param count   Number of samples to copy
114  *
115  * The destination and source buffers must either be completely separate (non-overlapping), or
116  * they must both start at the same address.  Partially overlapping buffers are not supported.
117  * The conversion is done by truncation, without dithering, so it loses resolution.
118  */
119 void memcpy_to_i16_from_i32(int16_t *dst, const int32_t *src, size_t count);
120 
121 /**
122  * Shrink and copy samples from single-precision floating-point to signed 16-bit.
123  * Each float should be in the range -1.0 to 1.0.  Values outside that range are clamped,
124  * refer to clamp16_from_float().
125  *
126  *  \param dst     Destination buffer
127  *  \param src     Source buffer
128  *  \param count   Number of samples to copy
129  *
130  * The destination and source buffers must either be completely separate (non-overlapping), or
131  * they must both start at the same address.  Partially overlapping buffers are not supported.
132  * The conversion is done by truncation, without dithering, so it loses resolution.
133  */
134 void memcpy_to_i16_from_float(int16_t *dst, const float *src, size_t count);
135 
136 /**
137  * Copy samples from signed fixed-point 32-bit Q4.27 to single-precision floating-point.
138  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
139  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].  Note the closed range
140  * at 1.0 and 16.0 is due to rounding on conversion to float. See float_from_q4_27() for details.
141  *
142  *  \param dst     Destination buffer
143  *  \param src     Source buffer
144  *  \param count   Number of samples to copy
145  *
146  * The destination and source buffers must either be completely separate (non-overlapping), or
147  * they must both start at the same address.  Partially overlapping buffers are not supported.
148  */
149 void memcpy_to_float_from_q4_27(float *dst, const int32_t *src, size_t count);
150 
151 /**
152  * Copy samples from signed fixed-point 16 bit Q0.15 to single-precision floating-point.
153  * The output float range is [-1.0, 1.0) for the fixed-point range [0x8000, 0x7fff].
154  * No rounding is needed as the representation is exact.
155  *
156  *  \param dst     Destination buffer
157  *  \param src     Source buffer
158  *  \param count   Number of samples to copy
159  *
160  * The destination and source buffers must either be completely separate (non-overlapping), or
161  * they must both start at the same address.  Partially overlapping buffers are not supported.
162  */
163 void memcpy_to_float_from_i16(float *dst, const int16_t *src, size_t count);
164 
165 /**
166  * Copy samples from unsigned fixed-point 8 bit to single-precision floating-point.
167  * The output float range is [-1.0, 1.0) for the fixed-point range [0x00, 0xFF].
168  * No rounding is needed as the representation is exact.
169  *
170  *  \param dst     Destination buffer
171  *  \param src     Source buffer
172  *  \param count   Number of samples to copy
173  *
174  * The destination and source buffers must either be completely separate (non-overlapping), or
175  * they must both start at the same address.  Partially overlapping buffers are not supported.
176  */
177 void memcpy_to_float_from_u8(float *dst, const uint8_t *src, size_t count);
178 
179 /**
180  * Copy samples from signed fixed-point packed 24 bit Q0.23 to single-precision floating-point.
181  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
182  * The output float range is [-1.0, 1.0) for the fixed-point range [0x800000, 0x7fffff].
183  * No rounding is needed as the representation is exact.
184  *
185  *  \param dst     Destination buffer
186  *  \param src     Source buffer
187  *  \param count   Number of samples to copy
188  *
189  * The destination and source buffers must either be completely separate (non-overlapping), or
190  * they must both start at the same address.  Partially overlapping buffers are not supported.
191  */
192 void memcpy_to_float_from_p24(float *dst, const uint8_t *src, size_t count);
193 
194 /**
195  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed point 16 bit Q0.15.
196  * The packed 24 bit output is stored in native endian format in a uint8_t byte array.
197  * The data is truncated without rounding.
198  *
199  *  \param dst     Destination buffer
200  *  \param src     Source buffer
201  *  \param count   Number of samples to copy
202  *
203  * The destination and source buffers must either be completely separate (non-overlapping), or
204  * they must both start at the same address.  Partially overlapping buffers are not supported.
205  */
206 void memcpy_to_i16_from_p24(int16_t *dst, const uint8_t *src, size_t count);
207 
208 /**
209  * Copy samples from signed fixed-point packed 24 bit Q0.23 to signed fixed-point 32-bit Q0.31.
210  * The packed 24 bit input is stored in native endian format in a uint8_t byte array.
211  * The output data range is [0x80000000, 0x7fffff00] at intervals of 0x100.
212  *
213  *  \param dst     Destination buffer
214  *  \param src     Source buffer
215  *  \param count   Number of samples to copy
216  *
217  * The destination and source buffers must either be completely separate (non-overlapping), or
218  * they must both start at the same address.  Partially overlapping buffers are not supported.
219  */
220 void memcpy_to_i32_from_p24(int32_t *dst, const uint8_t *src, size_t count);
221 
222 /**
223  * Copy samples from signed fixed point 16 bit Q0.15 to signed fixed-point packed 24 bit Q0.23.
224  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
225  * The output data range is [0x800000, 0x7fff00] (not full).
226  * Nevertheless there is no DC offset on the output, if the input has no DC offset.
227  *
228  *  \param dst     Destination buffer
229  *  \param src     Source buffer
230  *  \param count   Number of samples to copy
231  *
232  * The destination and source buffers must either be completely separate (non-overlapping), or
233  * they must both start at the same address.  Partially overlapping buffers are not supported.
234  */
235 void memcpy_to_p24_from_i16(uint8_t *dst, const int16_t *src, size_t count);
236 
237 /**
238  * Copy samples from single-precision floating-point to signed fixed-point packed 24 bit Q0.23.
239  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
240  * The data is clamped and rounded to nearest, ties away from zero. See clamp24_from_float()
241  * for details.
242  *
243  *  \param dst     Destination buffer
244  *  \param src     Source buffer
245  *  \param count   Number of samples to copy
246  *
247  * The destination and source buffers must either be completely separate (non-overlapping), or
248  * they must both start at the same address.  Partially overlapping buffers are not supported.
249  */
250 void memcpy_to_p24_from_float(uint8_t *dst, const float *src, size_t count);
251 
252 /**
253  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed-point packed 24 bit Q0.23.
254  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
255  * The data is clamped to the range is [0x800000, 0x7fffff].
256  *
257  *  \param dst     Destination buffer
258  *  \param src     Source buffer
259  *  \param count   Number of samples to copy
260  *
261  * The destination and source buffers must either be completely separate (non-overlapping), or
262  * they must both start at the same address.
263  */
264 void memcpy_to_p24_from_q8_23(uint8_t *dst, const int32_t *src, size_t count);
265 
266 /**
267  * Shrink and copy samples from signed 32-bit fixed-point Q0.31
268  * to signed fixed-point packed 24 bit Q0.23.
269  * The packed 24 bit output is assumed to be a native-endian uint8_t byte array.
270  *
271  *  \param dst     Destination buffer
272  *  \param src     Source buffer
273  *  \param count   Number of samples to copy
274  *
275  * The destination and source buffers must either be completely separate (non-overlapping), or
276  * they must both start at the same address.  Partially overlapping buffers are not supported.
277  * The conversion is done by truncation, without dithering, so it loses resolution.
278  */
279 void memcpy_to_p24_from_i32(uint8_t *dst, const int32_t *src, size_t count);
280 
281 /**
282  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q8.23.
283  * The output data range is [0xff800000, 0x007fff00] at intervals of 0x100.
284  *
285  *  \param dst     Destination buffer
286  *  \param src     Source buffer
287  *  \param count   Number of samples to copy
288  *
289  * The destination and source buffers must either be completely separate (non-overlapping), or
290  * they must both start at the same address.  Partially overlapping buffers are not supported.
291  */
292 void memcpy_to_q8_23_from_i16(int32_t *dst, const int16_t *src, size_t count);
293 
294 /**
295  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q8.23.
296  * This copy will clamp the Q8.23 representation to [0xff800000, 0x007fffff] even though there
297  * are guard bits available. Fractional lsb is rounded to nearest, ties away from zero.
298  * See clamp24_from_float() for details.
299  *
300  *  \param dst     Destination buffer
301  *  \param src     Source buffer
302  *  \param count   Number of samples to copy
303  *
304  * The destination and source buffers must either be completely separate (non-overlapping), or
305  * they must both start at the same address.  Partially overlapping buffers are not supported.
306  */
307 void memcpy_to_q8_23_from_float_with_clamp(int32_t *dst, const float *src, size_t count);
308 
309 /**
310  * Copy samples from signed fixed point packed 24-bit Q0.23 to signed fixed-point 32-bit Q8.23.
311  * The output data range is [0xff800000, 0x007fffff].
312  *
313  *  \param dst     Destination buffer
314  *  \param src     Source buffer
315  *  \param count   Number of samples to copy
316  *
317  * The destination and source buffers must either be completely separate (non-overlapping), or
318  * they must both start at the same address.  Partially overlapping buffers are not supported.
319  */
320 void memcpy_to_q8_23_from_p24(int32_t *dst, const uint8_t *src, size_t count);
321 
322 /**
323  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q4.27.
324  * The conversion will use the full available Q4.27 range, including guard bits.
325  * Fractional lsb is rounded to nearest, ties away from zero.
326  * See clampq4_27_from_float() for details.
327  *
328  *  \param dst     Destination buffer
329  *  \param src     Source buffer
330  *  \param count   Number of samples to copy
331  *
332  * The destination and source buffers must either be completely separate (non-overlapping), or
333  * they must both start at the same address.  Partially overlapping buffers are not supported.
334  */
335 void memcpy_to_q4_27_from_float(int32_t *dst, const float *src, size_t count);
336 
337 /**
338  * Copy samples from signed fixed-point 32-bit Q8.23 to signed fixed point 16-bit Q0.15.
339  * The data is clamped, and truncated without rounding.
340  *
341  *  \param dst     Destination buffer
342  *  \param src     Source buffer
343  *  \param count   Number of samples to copy
344  *
345  * The destination and source buffers must either be completely separate (non-overlapping), or
346  * they must both start at the same address.  Partially overlapping buffers are not supported.
347  */
348 void memcpy_to_i16_from_q8_23(int16_t *dst, const int32_t *src, size_t count);
349 
350 /**
351  * Copy samples from signed fixed-point 32-bit Q8.23 to single-precision floating-point.
352  * The nominal output float range is [-1.0, 1.0) for the fixed-point
353  * range [0xff800000, 0x007fffff]. The maximum output float range is [-256.0, 256.0).
354  * No rounding is needed as the representation is exact for nominal values.
355  * Rounding for overflow values is to nearest, ties to even.
356  *
357  *  \param dst     Destination buffer
358  *  \param src     Source buffer
359  *  \param count   Number of samples to copy
360  *
361  * The destination and source buffers must either be completely separate (non-overlapping), or
362  * they must both start at the same address.  Partially overlapping buffers are not supported.
363  */
364 void memcpy_to_float_from_q8_23(float *dst, const int32_t *src, size_t count);
365 
366 /**
367  * Expand and copy samples from unsigned 8-bit offset by 0x80 to signed 32-bit.
368  *
369  *  \param dst     Destination buffer
370  *  \param src     Source buffer
371  *  \param count   Number of samples to copy
372  *
373  * The destination and source buffers must either be completely separate (non-overlapping), or
374  * they must both start at the same address.  Partially overlapping buffers are not supported.
375  */
376 void memcpy_to_i32_from_u8(int32_t *dst, const uint8_t *src, size_t count);
377 
378 /**
379  * Copy samples from signed fixed point 16-bit Q0.15 to signed fixed-point 32-bit Q0.31.
380  * The output data range is [0x80000000, 0x7fff0000] at intervals of 0x10000.
381  *
382  *  \param dst     Destination buffer
383  *  \param src     Source buffer
384  *  \param count   Number of samples to copy
385  *
386  * The destination and source buffers must either be completely separate (non-overlapping), or
387  * they must both start at the same address.  Partially overlapping buffers are not supported.
388  */
389 void memcpy_to_i32_from_i16(int32_t *dst, const int16_t *src, size_t count);
390 
391 /**
392  * Copy samples from single-precision floating-point to signed fixed-point 32-bit Q0.31.
393  * If rounding is needed on truncation, the fractional lsb is rounded to nearest,
394  * ties away from zero. See clamp32_from_float() for details.
395  *
396  *  \param dst     Destination buffer
397  *  \param src     Source buffer
398  *  \param count   Number of samples to copy
399  *
400  * The destination and source buffers must either be completely separate (non-overlapping), or
401  * they must both start at the same address.  Partially overlapping buffers are not supported.
402  */
403 void memcpy_to_i32_from_float(int32_t *dst, const float *src, size_t count);
404 
405 /**
406  * Copy samples from signed fixed-point 32-bit Q0.31 to single-precision floating-point.
407  * The float range is [-1.0, 1.0] for the fixed-point range [0x80000000, 0x7fffffff].
408  * Rounding is done according to float_from_i32().
409  *
410  *  \param dst     Destination buffer
411  *  \param src     Source buffer
412  *  \param count   Number of samples to copy
413  *
414  * The destination and source buffers must either be completely separate (non-overlapping), or
415  * they must both start at the same address.  Partially overlapping buffers are not supported.
416  */
417 void memcpy_to_float_from_i32(float *dst, const int32_t *src, size_t count);
418 
419 /**
420  * Copy samples from unrestricted float to range restricted float [-absMax, absMax].
421  * Any float sample not in the range [-absMax, absMax] will be clamped in this range.
422  *
423  *  \param dst     Destination buffer
424  *  \param src     Source buffer
425  *  \param count   Number of samples to copy
426  *  \param absMax  Maximum of the absolute value of the copied samples.
427  *
428  * The destination and source buffers must either be completely separate (non-overlapping), or
429  * they must both start at the same address.  Partially overlapping buffers are not supported.
430  * Note: NAN is clamped to absMax and not 0 for performance reason (~2xfaster).
431  */
432 void memcpy_to_float_from_float_with_clamping(float *dst, const float *src, size_t count,
433                                               float absMax);
434 
435 /**
436  * Downmix pairs of interleaved stereo input 16-bit samples to mono output 16-bit samples.
437  *
438  *  \param dst     Destination buffer
439  *  \param src     Source buffer
440  *  \param count   Number of stereo frames to downmix
441  *
442  * The destination and source buffers must be completely separate (non-overlapping).
443  * The current implementation truncates the mean rather than dither, but this may change.
444  */
445 void downmix_to_mono_i16_from_stereo_i16(int16_t *dst, const int16_t *src, size_t count);
446 
447 /**
448  * Upmix mono input 16-bit samples to pairs of interleaved stereo output 16-bit samples by
449  * duplicating.
450  *
451  *  \param dst     Destination buffer
452  *  \param src     Source buffer
453  *  \param count   Number of mono samples to upmix
454  *
455  * The destination and source buffers must either be completely separate (non-overlapping), or
456  * they must both start at the same address.  Partially overlapping buffers are not supported.
457  */
458 void upmix_to_stereo_i16_from_mono_i16(int16_t *dst, const int16_t *src, size_t count);
459 
460 /**
461  * Downmix pairs of interleaved stereo input float samples to mono output float samples
462  * by averaging the stereo pair together.
463  *
464  *  \param dst     Destination buffer
465  *  \param src     Source buffer
466  *  \param count   Number of stereo frames to downmix
467  *
468  * The destination and source buffers must be completely separate (non-overlapping),
469  * or they must both start at the same address.
470  */
471 void downmix_to_mono_float_from_stereo_float(float *dst, const float *src, size_t count);
472 
473 /**
474  * Upmix mono input float samples to pairs of interleaved stereo output float samples by
475  * duplicating.
476  *
477  *  \param dst     Destination buffer
478  *  \param src     Source buffer
479  *  \param count   Number of mono samples to upmix
480  *
481  * The destination and source buffers must either be completely separate (non-overlapping), or
482  * they must both start at the same address.  Partially overlapping buffers are not supported.
483  */
484 void upmix_to_stereo_float_from_mono_float(float *dst, const float *src, size_t count);
485 
486 /**
487  * \return the total number of non-zero 32-bit samples.
488  */
489 size_t nonZeroMono32(const int32_t *samples, size_t count);
490 
491 /**
492  * \return the total number of non-zero 16-bit samples.
493  */
494 size_t nonZeroMono16(const int16_t *samples, size_t count);
495 
496 /**
497  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
498  * if either of its constituent 32-bit samples is non-zero.
499  */
500 size_t nonZeroStereo32(const int32_t *frames, size_t count);
501 
502 /**
503  * \return the total number of non-zero stereo frames, where a frame is considered non-zero
504  * if either of its constituent 16-bit samples is non-zero.
505  */
506 size_t nonZeroStereo16(const int16_t *frames, size_t count);
507 
508 /**
509  * Copy frames, selecting source samples based on a source channel mask to fit
510  * the destination channel mask. Unmatched channels in the destination channel mask
511  * are zero filled. Unmatched channels in the source channel mask are dropped.
512  * Channels present in the channel mask are represented by set bits in the
513  * uint32_t value and are matched without further interpretation.
514  *
515  *  \param dst         Destination buffer
516  *  \param dst_mask    Bit mask corresponding to destination channels present
517  *  \param src         Source buffer
518  *  \param src_mask    Bit mask corresponding to source channels present
519  *  \param sample_size Size of each sample in bytes.  Must be 1, 2, 3, or 4.
520  *  \param count       Number of frames to copy
521  *
522  * The destination and source buffers must be completely separate (non-overlapping).
523  * If the sample size is not in range, the function will abort.
524  */
525 void memcpy_by_channel_mask(void *dst, uint32_t dst_mask,
526         const void *src, uint32_t src_mask, size_t sample_size, size_t count);
527 
528 /**
529  * Copy frames, selecting source samples based on an index array (idxary).
530  * The idxary[] consists of dst_channels number of elements.
531  * The ith element if idxary[] corresponds the ith destination channel.
532  * A non-negative value is the channel index in the source frame.
533  * A negative index (-1) represents filling with 0.
534  *
535  * Example: Swapping L and R channels for stereo streams
536  * <PRE>
537  * idxary[0] = 1;
538  * idxary[1] = 0;
539  * </PRE>
540  *
541  * Example: Copying a mono source to the front center 5.1 channel
542  * <PRE>
543  * idxary[0] = -1;
544  * idxary[1] = -1;
545  * idxary[2] = 0;
546  * idxary[3] = -1;
547  * idxary[4] = -1;
548  * idxary[5] = -1;
549  * </PRE>
550  *
551  * This copy allows swizzling of channels or replication of channels.
552  *
553  *  \param dst           Destination buffer
554  *  \param dst_channels  Number of destination channels per frame
555  *  \param src           Source buffer
556  *  \param src_channels  Number of source channels per frame
557  *  \param idxary        Array of indices representing channels in the source frame
558  *  \param sample_size   Size of each sample in bytes.  Must be 1, 2, 3, or 4.
559  *  \param count         Number of frames to copy
560  *
561  * The destination and source buffers must be completely separate (non-overlapping).
562  * If the sample size is not in range, the function will abort.
563  */
564 void memcpy_by_index_array(void *dst, uint32_t dst_channels,
565         const void *src, uint32_t src_channels,
566         const int8_t *idxary, size_t sample_size, size_t count);
567 
568 /**
569  * Prepares an index array (idxary) from channel masks, which can be later
570  * used by memcpy_by_index_array().
571  *
572  * \return the number of array elements required.
573  * This may be greater than idxcount, so the return value should be checked
574  * if idxary size is less than 32.
575  *
576  * Note that idxary is a caller allocated array
577  * of at least as many channels as present in the dst_mask.
578  * Channels present in the channel mask are represented by set bits in the
579  * uint32_t value and are matched without further interpretation.
580  *
581  * This function is typically used for converting audio data with different
582  * channel position masks.
583  *
584  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
585  *  \param idxcount    Number of caller allocated elements in idxary
586  *  \param dst_mask    Bit mask corresponding to destination channels present
587  *  \param src_mask    Bit mask corresponding to source channels present
588  */
589 size_t memcpy_by_index_array_initialization(int8_t *idxary, size_t idxcount,
590         uint32_t dst_mask, uint32_t src_mask);
591 
592 /**
593  * Prepares an index array (idxary) from channel masks, which can be later
594  * used by memcpy_by_index_array().
595  *
596  * \return the number of array elements required.
597  *
598  * For a source channel index mask, the source channels will map to the destination
599  * channels as if counting the set bits in dst_mask in order from lsb to msb
600  * (zero bits are ignored). The ith bit of the src_mask corresponds to the
601  * ith SET bit of dst_mask and the ith destination channel.  Hence, a zero ith
602  * bit of the src_mask indicates that the ith destination channel plays silence.
603  *
604  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
605  *  \param idxcount    Number of caller allocated elements in idxary
606  *  \param dst_mask    Bit mask corresponding to destination channels present
607  *  \param src_mask    Bit mask corresponding to source channels present
608  */
609 size_t memcpy_by_index_array_initialization_src_index(int8_t *idxary, size_t idxcount,
610         uint32_t dst_mask, uint32_t src_mask);
611 
612 /**
613  * Prepares an index array (idxary) from channel mask bits, which can be later
614  * used by memcpy_by_index_array().
615  *
616  * \return the number of array elements required.
617  *
618  * This initialization is for a destination channel index mask from a positional
619  * source mask.
620  *
621  * For an destination channel index mask, the input channels will map
622  * to the destination channels, with the ith SET bit in the source bits corresponding
623  * to the ith bit in the destination bits. If there is a zero bit in the middle
624  * of set destination bits (unlikely), the corresponding source channel will
625  * be dropped.
626  *
627  *  \param idxary      Updated array of indices of channels in the src frame for the dst frame
628  *  \param idxcount    Number of caller allocated elements in idxary
629  *  \param dst_mask    Bit mask corresponding to destination channels present
630  *  \param src_mask    Bit mask corresponding to source channels present
631  */
632 size_t memcpy_by_index_array_initialization_dst_index(int8_t *idxary, size_t idxcount,
633         uint32_t dst_mask, uint32_t src_mask);
634 
635 /**
636  * Add and clamp signed 16-bit samples.
637  *
638  *  \param dst     Destination buffer
639  *  \param src     Source buffer
640  *  \param count   Number of samples to add
641  *
642  * The destination and source buffers must either be completely separate (non-overlapping), or
643  * they must both start at the same address.  Partially overlapping buffers are not supported.
644  */
645 void accumulate_i16(int16_t *dst, const int16_t *src, size_t count);
646 
647 /**
648  * Add and clamp unsigned 8-bit samples.
649  *
650  *  \param dst     Destination buffer
651  *  \param src     Source buffer
652  *  \param count   Number of samples to add
653  *
654  * The destination and source buffers must either be completely separate (non-overlapping), or
655  * they must both start at the same address.  Partially overlapping buffers are not supported.
656  */
657 void accumulate_u8(uint8_t *dst, const uint8_t *src, size_t count);
658 
659 /**
660  * Add and clamp packed 24-bit Q0.23 samples.
661  *
662  *  \param dst     Destination buffer
663  *  \param src     Source buffer
664  *  \param count   Number of samples to add
665  *
666  * The destination and source buffers must either be completely separate (non-overlapping), or
667  * they must both start at the same address.  Partially overlapping buffers are not supported.
668  */
669 void accumulate_p24(uint8_t *dst, const uint8_t *src, size_t count);
670 
671 /**
672  * Add and clamp 32-bit Q8.23 samples.
673  *
674  *  \param dst     Destination buffer
675  *  \param src     Source buffer
676  *  \param count   Number of samples to add
677  *
678  * The destination and source buffers must either be completely separate (non-overlapping), or
679  * they must both start at the same address.  Partially overlapping buffers are not supported.
680  */
681 void accumulate_q8_23(int32_t *dst, const int32_t *src, size_t count);
682 
683 /**
684  * Add and clamp signed 32-bit Q0.31 samples.
685  *
686  *  \param dst     Destination buffer
687  *  \param src     Source buffer
688  *  \param count   Number of samples to add
689  *
690  * The destination and source buffers must either be completely separate (non-overlapping), or
691  * they must both start at the same address.  Partially overlapping buffers are not supported.
692  */
693 void accumulate_i32(int32_t *dst, const int32_t *src, size_t count);
694 
695 /**
696  * Add float samples. Result is not clamped.
697  *
698  *  \param dst     Destination buffer
699  *  \param src     Source buffer
700  *  \param count   Number of samples to add
701  *
702  * The destination and source buffers must either be completely separate (non-overlapping), or
703  * they must both start at the same address.  Partially overlapping buffers are not supported.
704  */
705 void accumulate_float(float *dst, const float *src, size_t count);
706 
707 /**
708  * Clamp (aka hard limit or clip) a signed 32-bit sample to 16-bit range.
709  */
clamp16(int32_t sample)710 static inline int16_t clamp16(int32_t sample)
711 {
712     if ((sample>>15) ^ (sample>>31))
713         sample = 0x7FFF ^ (sample>>31);
714     return sample;
715 }
716 
717 /**
718  * Clamp (aka hard limit or clip) a signed 64-bit sample to 32-bit range.
719  */
clamp32(int64_t sample)720 static inline int32_t clamp32(int64_t sample)
721 {
722     if ((sample>>31) ^ (sample>>63))
723         sample = 0x7fffffff ^ (sample>>63);
724     return sample;
725 }
726 
727 /**
728  * Convert a IEEE 754 single precision float [-1.0, 1.0) to int16_t [-32768, 32767]
729  * with clamping.  Note the open bound at 1.0, values within 1/65536 of 1.0 map
730  * to 32767 instead of 32768 (early clamping due to the smaller positive integer subrange).
731  *
732  * Values outside the range [-1.0, 1.0) are properly clamped to -32768 and 32767,
733  * including -Inf and +Inf. NaN will generally be treated either as -32768 or 32767,
734  * depending on the sign bit inside NaN (whose representation is not unique).
735  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
736  *
737  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
738  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
739  */
clamp16_from_float(float f)740 static inline int16_t clamp16_from_float(float f)
741 {
742 #if 0
743     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
744      * floating point significand. The normal shift is 3<<22, but the -15 offset
745      * is used to multiply by 32768.
746      */
747     static const float offset = (float)(3 << (22 - 15));
748     /* zero = (0x10f << 22) =  0x43c00000 (not directly used) */
749     static const int32_t limneg = (0x10f << 22) /*zero*/ - 32768; /* 0x43bf8000 */
750     static const int32_t limpos = (0x10f << 22) /*zero*/ + 32767; /* 0x43c07fff */
751 
752     union {
753         float f;
754         int32_t i;
755     } u;
756 
757     u.f = f + offset; /* recenter valid range */
758     /* Now the valid range is represented as integers between [limneg, limpos].
759      * Clamp using the fact that float representation (as an integer) is an ordered set.
760      */
761     if (u.i < limneg)
762         u.i = -32768;
763     else if (u.i > limpos)
764         u.i = 32767;
765     return u.i; /* Return lower 16 bits, the part of interest in the significand. */
766 #else
767     static const float scale = 1 << 15;
768     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
769 #endif
770 }
771 
772 /**
773  * Convert a IEEE 754 single precision float [-1.0, 1.0) to uint8_t [0, 0xff]
774  * with clamping.  Note the open bound at 1.0, values within 1/128 of 1.0 map
775  * to 255 instead of 256 (early clamping due to the smaller positive integer subrange).
776  *
777  * Values outside the range [-1.0, 1.0) are properly clamped to 0 and 255,
778  * including -Inf and +Inf. NaN will generally be treated either as 0 or 255,
779  * depending on the sign bit inside NaN (whose representation is not unique).
780  * Nevertheless, strictly speaking, NaN behavior should be considered undefined.
781  *
782  * OLD code disabled: Rounding of 0.5 lsb is to even (default for IEEE 754).
783  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
784  */
clamp8_from_float(float f)785 static inline uint8_t clamp8_from_float(float f)
786 {
787 #if 0
788     /* Offset is used to expand the valid range of [-1.0, 1.0) into the 16 lsbs of the
789      * floating point significand. The normal shift is 3<<22, but the -7 offset
790      * is used to multiply by 128.
791      */
792     static const float offset = (float)((3 << (22 - 7)) + 1 /* to cancel -1.0 */);
793     /* zero = (0x11f << 22) =  0x47c00000 */
794     static const int32_t limneg = (0x11f << 22) /*zero*/;
795     static const int32_t limpos = (0x11f << 22) /*zero*/ + 255; /* 0x47c000ff */
796 
797     union {
798         float f;
799         int32_t i;
800     } u;
801 
802     u.f = f + offset; /* recenter valid range */
803     /* Now the valid range is represented as integers between [limneg, limpos].
804      * Clamp using the fact that float representation (as an integer) is an ordered set.
805      */
806     if (u.i < limneg)
807         return 0;
808     if (u.i > limpos)
809         return 255;
810     return u.i; /* Return lower 8 bits, the part of interest in the significand. */
811 #else
812     return roundf(fmaxf(fminf(f * 128.f + 128.f, 255.f), 0.f));
813 #endif
814 }
815 
816 /**
817  * Convert a single-precision floating point value to a Q0.23 integer value, stored in a
818  * 32 bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
819  *
820  * OLD code disabled: Rounds to nearest, ties away from 0.
821  * NEW code enabled: Rounding of 0.5 lsb is away from 0.
822  *
823  * Values outside the range [-1.0, 1.0) are properly clamped to -8388608 and 8388607,
824  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
825  * depending on hardware and future implementation of this function.
826  */
clamp24_from_float(float f)827 static inline int32_t clamp24_from_float(float f)
828 {
829 #if 0
830     static const float scale = (float)(1 << 23);
831     static const float limpos = 0x7fffff / scale;
832     static const float limneg = -0x800000 / scale;
833 
834     if (f <= limneg) {
835         return -0x800000;
836     } else if (f >= limpos) {
837         return 0x7fffff;
838     }
839     f *= scale;
840     /* integer conversion is through truncation (though int to float is not).
841      * ensure that we round to nearest, ties away from 0.
842      */
843     return f > 0 ? f + 0.5 : f - 0.5;
844 #else
845     static const float scale = 1 << 23;
846     return roundf(fmaxf(fminf(f * scale, scale - 1.f), -scale));
847 #endif
848 }
849 
850 /**
851  * Convert a signed fixed-point 32-bit Q8.23 value to a Q0.23 integer value,
852  * stored in a 32-bit signed integer (technically stored as Q8.23, but clamped to Q0.23).
853  *
854  * Values outside the range [-0x800000, 0x7fffff] are clamped to that range.
855  */
clamp24_from_q8_23(int32_t ival)856 static inline int32_t clamp24_from_q8_23(int32_t ival)
857 {
858     static const int32_t limpos = 0x7fffff;
859     static const int32_t limneg = -0x800000;
860     if (ival < limneg) {
861         return limneg;
862     } else if (ival > limpos) {
863         return limpos;
864     } else {
865         return ival;
866     }
867 }
868 
869 /**
870  * Convert a single-precision floating point value to a Q4.27 integer value.
871  * Rounds to nearest, ties away from 0.
872  *
873  * Values outside the range [-16.0, 16.0) are properly clamped to -2147483648 and 2147483647,
874  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
875  * depending on hardware and future implementation of this function.
876  */
clampq4_27_from_float(float f)877 static inline int32_t clampq4_27_from_float(float f)
878 {
879     static const float scale = (float)(1UL << 27);
880     static const float limpos = 16.;
881     static const float limneg = -16.;
882 
883     if (f <= limneg) {
884         return -0x80000000; /* or 0x80000000 */
885     } else if (f >= limpos) {
886         return 0x7fffffff;
887     }
888     f *= scale;
889     /* integer conversion is through truncation (though int to float is not).
890      * ensure that we round to nearest, ties away from 0.
891      */
892     return f > 0 ? f + 0.5 : f - 0.5;
893 }
894 
895 /**
896  * Convert a single-precision floating point value to a Q0.31 integer value.
897  * Rounds to nearest, ties away from 0.
898  *
899  * Values outside the range [-1.0, 1.0) are properly clamped to -2147483648 and 2147483647,
900  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
901  * depending on hardware and future implementation of this function.
902  */
clamp32_from_float(float f)903 static inline int32_t clamp32_from_float(float f)
904 {
905     static const float scale = (float)(1UL << 31);
906     static const float limpos = 1.;
907     static const float limneg = -1.;
908 
909     if (f <= limneg) {
910         return -0x80000000; /* or 0x80000000 */
911     } else if (f >= limpos) {
912         return 0x7fffffff;
913     }
914     f *= scale;
915     /* integer conversion is through truncation (though int to float is not).
916      * ensure that we round to nearest, ties away from 0.
917      */
918     return f > 0 ? f + 0.5 : f - 0.5;
919 }
920 
921 /**
922  * Convert a signed fixed-point 32-bit Q4.27 value to single-precision floating-point.
923  * The nominal output float range is [-1.0, 1.0] if the fixed-point range is
924  * [0xf8000000, 0x07ffffff].  The full float range is [-16.0, 16.0].
925  *
926  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
927  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
928  * precision floating point, the 0.5 lsb in the significand conversion will round
929  * towards even, as per IEEE 754 default.
930  */
float_from_q4_27(int32_t ival)931 static inline float float_from_q4_27(int32_t ival)
932 {
933     /* The scale factor is the reciprocal of the fractional bits.
934      *
935      * Since the scale factor is a power of 2, the scaling is exact, and there
936      * is no rounding due to the multiplication - the bit pattern is preserved.
937      * However, there may be rounding due to the fixed-point to float conversion,
938      * as described above.
939      */
940     static const float scale = 1. / (float)(1UL << 27);
941 
942     return ival * scale;
943 }
944 
945 /**
946  * Convert an unsigned fixed-point 32-bit U4.28 value to single-precision floating-point.
947  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
948  * [0x00000000, 0x10000000].  The full float range is [0.0, 16.0].
949  *
950  * Note the closed range at 1.0 and 16.0 is due to rounding on conversion to float.
951  * In more detail: if the fixed-point integer exceeds 24 bit significand of single
952  * precision floating point, the 0.5 lsb in the significand conversion will round
953  * towards even, as per IEEE 754 default.
954  */
float_from_u4_28(uint32_t uval)955 static inline float float_from_u4_28(uint32_t uval)
956 {
957     static const float scale = 1. / (float)(1UL << 28);
958 
959     return uval * scale;
960 }
961 
962 /**
963  * Convert an unsigned fixed-point 16-bit U4.12 value to single-precision floating-point.
964  * The nominal output float range is [0.0, 1.0] if the fixed-point range is
965  * [0x0000, 0x1000].  The full float range is [0.0, 16.0).
966  */
float_from_u4_12(uint16_t uval)967 static inline float float_from_u4_12(uint16_t uval)
968 {
969     static const float scale = 1. / (float)(1UL << 12);
970 
971     return uval * scale;
972 }
973 
974 /**
975  * Convert a single-precision floating point value to a U4.28 integer value.
976  * Rounds to nearest, ties away from 0.
977  *
978  * Values outside the range [0, 16.0] are properly clamped to [0, 4294967295]
979  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
980  * depending on hardware and future implementation of this function.
981  */
u4_28_from_float(float f)982 static inline uint32_t u4_28_from_float(float f)
983 {
984     static const float scale = (float)(1 << 28);
985     static const float limpos = 0xffffffffUL / scale;
986 
987     if (f <= 0.) {
988         return 0;
989     } else if (f >= limpos) {
990         return 0xffffffff;
991     }
992     /* integer conversion is through truncation (though int to float is not).
993      * ensure that we round to nearest, ties away from 0.
994      */
995     return f * scale + 0.5;
996 }
997 
998 /**
999  * Convert a single-precision floating point value to a U4.12 integer value.
1000  * Rounds to nearest, ties away from 0.
1001  *
1002  * Values outside the range [0, 16.0) are properly clamped to [0, 65535]
1003  * including -Inf and +Inf. NaN values are considered undefined, and behavior may change
1004  * depending on hardware and future implementation of this function.
1005  */
u4_12_from_float(float f)1006 static inline uint16_t u4_12_from_float(float f)
1007 {
1008     static const float scale = (float)(1 << 12);
1009     static const float limpos = 0xffff / scale;
1010 
1011     if (f <= 0.) {
1012         return 0;
1013     } else if (f >= limpos) {
1014         return 0xffff;
1015     }
1016     /* integer conversion is through truncation (though int to float is not).
1017      * ensure that we round to nearest, ties away from 0.
1018      */
1019     return f * scale + 0.5;
1020 }
1021 
1022 /**
1023  * Convert a signed fixed-point 16-bit Q0.15 value to single-precision floating-point.
1024  * The output float range is [-1.0, 1.0) for the fixed-point range
1025  * [0x8000, 0x7fff].
1026  *
1027  * There is no rounding, the conversion and representation is exact.
1028  */
float_from_i16(int16_t ival)1029 static inline float float_from_i16(int16_t ival)
1030 {
1031     /* The scale factor is the reciprocal of the nominal 16 bit integer
1032      * half-sided range (32768).
1033      *
1034      * Since the scale factor is a power of 2, the scaling is exact, and there
1035      * is no rounding due to the multiplication - the bit pattern is preserved.
1036      */
1037     static const float scale = 1. / (float)(1UL << 15);
1038 
1039     return ival * scale;
1040 }
1041 
1042 /**
1043  * Convert an unsigned fixed-point 8-bit U0.8 value to single-precision floating-point.
1044  * The nominal output float range is [-1.0, 1.0) if the fixed-point range is
1045  * [0x00, 0xff].
1046  */
float_from_u8(uint8_t uval)1047 static inline float float_from_u8(uint8_t uval)
1048 {
1049     static const float scale = 1. / (float)(1UL << 7);
1050 
1051     return ((int)uval - 128) * scale;
1052 }
1053 
1054 /**
1055  * Convert a packed 24bit Q0.23 value stored native-endian in a uint8_t ptr
1056  * to a signed fixed-point 32 bit integer Q0.31 value. The output Q0.31 range
1057  * is [0x80000000, 0x7fffff00] for the fixed-point range [0x800000, 0x7fffff].
1058  * Even though the output range is limited on the positive side, there is no
1059  * DC offset on the output, if the input has no DC offset.
1060  *
1061  * Avoid relying on the limited output range, as future implementations may go
1062  * to full range.
1063  */
i32_from_p24(const uint8_t * packed24)1064 static inline int32_t i32_from_p24(const uint8_t *packed24)
1065 {
1066     /* convert to 32b */
1067     return (packed24[0] << 8) | (packed24[1] << 16) | (packed24[2] << 24);
1068 }
1069 
1070 /**
1071  * Convert a 32-bit Q0.31 value to single-precision floating-point.
1072  * The output float range is [-1.0, 1.0] for the fixed-point range
1073  * [0x80000000, 0x7fffffff].
1074  *
1075  * Rounding may occur in the least significant 8 bits for large fixed point
1076  * values due to storage into the 24-bit floating-point significand.
1077  * Rounding will be to nearest, ties to even.
1078  */
float_from_i32(int32_t ival)1079 static inline float float_from_i32(int32_t ival)
1080 {
1081     static const float scale = 1. / (float)(1UL << 31);
1082 
1083     return ival * scale;
1084 }
1085 
1086 /**
1087  * Convert a packed 24bit Q0.23 value stored native endian in a uint8_t ptr
1088  * to single-precision floating-point. The output float range is [-1.0, 1.0)
1089  * for the fixed-point range [0x800000, 0x7fffff].
1090  *
1091  * There is no rounding, the conversion and representation is exact.
1092  */
float_from_p24(const uint8_t * packed24)1093 static inline float float_from_p24(const uint8_t *packed24)
1094 {
1095     return float_from_i32(i32_from_p24(packed24));
1096 }
1097 
1098 /**
1099  * Convert a 24-bit Q8.23 value to single-precision floating-point.
1100  * The nominal output float range is [-1.0, 1.0) for the fixed-point
1101  * range [0xff800000, 0x007fffff].  The maximum float range is [-256.0, 256.0).
1102  *
1103  * There is no rounding in the nominal range, the conversion and representation
1104  * is exact. For values outside the nominal range, rounding is to nearest, ties to even.
1105  */
float_from_q8_23(int32_t ival)1106 static inline float float_from_q8_23(int32_t ival)
1107 {
1108     static const float scale = 1. / (float)(1UL << 23);
1109 
1110     return ival * scale;
1111 }
1112 
1113 /**
1114  * Multiply-accumulate 16-bit terms with 32-bit result: return a + in*v.
1115  */
1116 static inline
mulAdd(int16_t in,int16_t v,int32_t a)1117 int32_t mulAdd(int16_t in, int16_t v, int32_t a)
1118 {
1119 #if defined(__arm__) && !defined(__thumb__)
1120     int32_t out;
1121     asm( "smlabb %[out], %[in], %[v], %[a] \n"
1122          : [out]"=r"(out)
1123          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
1124          : );
1125     return out;
1126 #else
1127     return a + in * (int32_t)v;
1128 #endif
1129 }
1130 
1131 /**
1132  * Multiply 16-bit terms with 32-bit result: return in*v.
1133  */
1134 static inline
mul(int16_t in,int16_t v)1135 int32_t mul(int16_t in, int16_t v)
1136 {
1137 #if defined(__arm__) && !defined(__thumb__)
1138     int32_t out;
1139     asm( "smulbb %[out], %[in], %[v] \n"
1140          : [out]"=r"(out)
1141          : [in]"%r"(in), [v]"r"(v)
1142          : );
1143     return out;
1144 #else
1145     return in * (int32_t)v;
1146 #endif
1147 }
1148 
1149 /**
1150  * Similar to mulAdd, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1151  */
1152 static inline
mulAddRL(int left,uint32_t inRL,uint32_t vRL,int32_t a)1153 int32_t mulAddRL(int left, uint32_t inRL, uint32_t vRL, int32_t a)
1154 {
1155 #if defined(__arm__) && !defined(__thumb__)
1156     int32_t out;
1157     if (left) {
1158         asm( "smlabb %[out], %[inRL], %[vRL], %[a] \n"
1159              : [out]"=r"(out)
1160              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1161              : );
1162     } else {
1163         asm( "smlatt %[out], %[inRL], %[vRL], %[a] \n"
1164              : [out]"=r"(out)
1165              : [inRL]"%r"(inRL), [vRL]"r"(vRL), [a]"r"(a)
1166              : );
1167     }
1168     return out;
1169 #else
1170     if (left) {
1171         return a + (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1172     } else {
1173         return a + (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1174     }
1175 #endif
1176 }
1177 
1178 /**
1179  * Similar to mul, but the 16-bit terms are extracted from a 32-bit interleaved stereo pair.
1180  */
1181 static inline
mulRL(int left,uint32_t inRL,uint32_t vRL)1182 int32_t mulRL(int left, uint32_t inRL, uint32_t vRL)
1183 {
1184 #if defined(__arm__) && !defined(__thumb__)
1185     int32_t out;
1186     if (left) {
1187         asm( "smulbb %[out], %[inRL], %[vRL] \n"
1188              : [out]"=r"(out)
1189              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1190              : );
1191     } else {
1192         asm( "smultt %[out], %[inRL], %[vRL] \n"
1193              : [out]"=r"(out)
1194              : [inRL]"%r"(inRL), [vRL]"r"(vRL)
1195              : );
1196     }
1197     return out;
1198 #else
1199     if (left) {
1200         return (int16_t)(inRL&0xFFFF) * (int16_t)(vRL&0xFFFF);
1201     } else {
1202         return (int16_t)(inRL>>16) * (int16_t)(vRL>>16);
1203     }
1204 #endif
1205 }
1206 
1207 /** \cond */
1208 __END_DECLS
1209 /** \endcond */
1210 
1211 #endif  // ANDROID_AUDIO_PRIMITIVES_H
1212