• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2007 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "AudioResamplerSinc"
18 //#define LOG_NDEBUG 0
19 
20 #define __STDC_CONSTANT_MACROS
21 #include <malloc.h>
22 #include <pthread.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <dlfcn.h>
26 
27 #include <cutils/compiler.h>
28 #include <cutils/properties.h>
29 
30 #include <utils/Log.h>
31 #include <audio_utils/primitives.h>
32 
33 #include "AudioResamplerSinc.h"
34 
35 #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned)
36 #define __builtin_assume_aligned(p, a) \
37 	(((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p)))
38 #endif
39 
40 #if defined(__arm__) && !defined(__thumb__)
41 #define USE_INLINE_ASSEMBLY (true)
42 #else
43 #define USE_INLINE_ASSEMBLY (false)
44 #endif
45 
46 #if defined(__aarch64__) || defined(__ARM_NEON__)
47 #ifndef USE_NEON
48 #define USE_NEON (true)
49 #endif
50 #else
51 #define USE_NEON (false)
52 #endif
53 #if USE_NEON
54 #include <arm_neon.h>
55 #endif
56 
57 #define UNUSED(x) ((void)(x))
58 
59 namespace android {
60 // ----------------------------------------------------------------------------
61 
62 
63 /*
64  * These coeficients are computed with the "fir" utility found in
65  * tools/resampler_tools
66  * cmd-line: fir -l 7 -s 48000 -c 20478
67  */
68 const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = {
69 #include "AudioResamplerSincUp.h"
70 };
71 
72 /*
73  * These coefficients are optimized for 48KHz -> 44.1KHz
74  * cmd-line: fir -l 7 -s 48000 -c 17189
75  */
76 const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = {
77 #include "AudioResamplerSincDown.h"
78 };
79 
80 // we use 15 bits to interpolate between these samples
81 // this cannot change because the mul below rely on it.
82 static const int pLerpBits = 15;
83 
84 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
85 static readCoefficientsFn readResampleCoefficients = NULL;
86 
87 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants;
88 /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants;
89 
init_routine()90 void AudioResamplerSinc::init_routine()
91 {
92     // for high quality resampler, the parameters for coefficients are compile-time constants
93     Constants *c = &highQualityConstants;
94     c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS;
95     c->cShift = kNumPhaseBits - c->coefsBits;
96     c->cMask = ((1<< c->coefsBits)-1) << c->cShift;
97     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
98     c->pMask = ((1<< pLerpBits)-1) << c->pShift;
99     c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF;
100 
101     // for very high quality resampler, the parameters are load-time constants
102     veryHighQualityConstants = highQualityConstants;
103 
104     // Open the dll to get the coefficients for VERY_HIGH_QUALITY
105     void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW);
106     ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib);
107     if (resampleCoeffLib == NULL) {
108         ALOGE("Could not open audio-resampler library: %s", dlerror());
109         return;
110     }
111 
112     readResampleFirNumCoeffFn readResampleFirNumCoeff;
113     readResampleFirLerpIntBitsFn readResampleFirLerpIntBits;
114 
115     readResampleCoefficients = (readCoefficientsFn)
116             dlsym(resampleCoeffLib, "readResamplerCoefficients");
117     readResampleFirNumCoeff = (readResampleFirNumCoeffFn)
118             dlsym(resampleCoeffLib, "readResampleFirNumCoeff");
119     readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn)
120             dlsym(resampleCoeffLib, "readResampleFirLerpIntBits");
121 
122     if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) {
123         readResampleCoefficients = NULL;
124         dlclose(resampleCoeffLib);
125         resampleCoeffLib = NULL;
126         ALOGE("Could not find symbol: %s", dlerror());
127         return;
128     }
129 
130     c = &veryHighQualityConstants;
131     c->coefsBits = readResampleFirLerpIntBits();
132     c->cShift = kNumPhaseBits - c->coefsBits;
133     c->cMask = ((1<<c->coefsBits)-1) << c->cShift;
134     c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits;
135     c->pMask = ((1<<pLerpBits)-1) << c->pShift;
136     // number of zero-crossing on each side
137     c->halfNumCoefs = readResampleFirNumCoeff();
138     ALOGV("coefsBits = %d", c->coefsBits);
139     ALOGV("halfNumCoefs = %d", c->halfNumCoefs);
140     // note that we "leak" resampleCoeffLib until the process exits
141 }
142 
143 // ----------------------------------------------------------------------------
144 
145 #if !USE_NEON
146 
147 static inline
mulRL(int left,int32_t in,uint32_t vRL)148 int32_t mulRL(int left, int32_t in, uint32_t vRL)
149 {
150 #if USE_INLINE_ASSEMBLY
151     int32_t out;
152     if (left) {
153         asm( "smultb %[out], %[in], %[vRL] \n"
154              : [out]"=r"(out)
155              : [in]"%r"(in), [vRL]"r"(vRL)
156              : );
157     } else {
158         asm( "smultt %[out], %[in], %[vRL] \n"
159              : [out]"=r"(out)
160              : [in]"%r"(in), [vRL]"r"(vRL)
161              : );
162     }
163     return out;
164 #else
165     int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16);
166     return int32_t((int64_t(in) * v) >> 16);
167 #endif
168 }
169 
170 static inline
mulAdd(int16_t in,int32_t v,int32_t a)171 int32_t mulAdd(int16_t in, int32_t v, int32_t a)
172 {
173 #if USE_INLINE_ASSEMBLY
174     int32_t out;
175     asm( "smlawb %[out], %[v], %[in], %[a] \n"
176          : [out]"=r"(out)
177          : [in]"%r"(in), [v]"r"(v), [a]"r"(a)
178          : );
179     return out;
180 #else
181     return a + int32_t((int64_t(v) * in) >> 16);
182 #endif
183 }
184 
185 static inline
mulAddRL(int left,uint32_t inRL,int32_t v,int32_t a)186 int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a)
187 {
188 #if USE_INLINE_ASSEMBLY
189     int32_t out;
190     if (left) {
191         asm( "smlawb %[out], %[v], %[inRL], %[a] \n"
192              : [out]"=r"(out)
193              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
194              : );
195     } else {
196         asm( "smlawt %[out], %[v], %[inRL], %[a] \n"
197              : [out]"=r"(out)
198              : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a)
199              : );
200     }
201     return out;
202 #else
203     int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16);
204     return a + int32_t((int64_t(v) * s) >> 16);
205 #endif
206 }
207 
208 #endif // !USE_NEON
209 
210 // ----------------------------------------------------------------------------
211 
AudioResamplerSinc(int inChannelCount,int32_t sampleRate,src_quality quality)212 AudioResamplerSinc::AudioResamplerSinc(
213         int inChannelCount, int32_t sampleRate, src_quality quality)
214     : AudioResampler(inChannelCount, sampleRate, quality),
215     mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0)
216 {
217     /*
218      * Layout of the state buffer for 32 tap:
219      *
220      * "present" sample            beginning of 2nd buffer
221      *                 v                v
222      *  0              01               2              23              3
223      *  0              F0               0              F0              F
224      * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn]
225      *                 ^               ^ head
226      *
227      * p = past samples, convoluted with the (p)ositive side of sinc()
228      * n = future samples, convoluted with the (n)egative side of sinc()
229      * r = extra space for implementing the ring buffer
230      *
231      */
232 
233     mVolumeSIMD[0] = 0;
234     mVolumeSIMD[1] = 0;
235 
236     // Load the constants for coefficients
237     int ok = pthread_once(&once_control, init_routine);
238     if (ok != 0) {
239         ALOGE("%s pthread_once failed: %d", __func__, ok);
240     }
241     mConstants = (quality == VERY_HIGH_QUALITY) ?
242             &veryHighQualityConstants : &highQualityConstants;
243 }
244 
245 
~AudioResamplerSinc()246 AudioResamplerSinc::~AudioResamplerSinc() {
247     free(mState);
248 }
249 
init()250 void AudioResamplerSinc::init() {
251     const Constants& c(*mConstants);
252     const size_t numCoefs = 2 * c.halfNumCoefs;
253     const size_t stateSize = numCoefs * mChannelCount * 2;
254     mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t));
255     memset(mState, 0, sizeof(int16_t)*stateSize);
256     mImpulse  = mState   + (c.halfNumCoefs-1)*mChannelCount;
257     mRingFull = mImpulse + (numCoefs+1)*mChannelCount;
258 }
259 
setVolume(float left,float right)260 void AudioResamplerSinc::setVolume(float left, float right) {
261     AudioResampler::setVolume(left, right);
262     // convert to U4_28 (rounding down).
263     // integer volume values are clamped to 0 to UNITY_GAIN.
264     mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left));
265     mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right));
266 }
267 
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)268 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
269             AudioBufferProvider* provider)
270 {
271     // FIXME store current state (up or down sample) and only load the coefs when the state
272     // changes. Or load two pointers one for up and one for down in the init function.
273     // Not critical now since the read functions are fast, but would be important if read was slow.
274     if (mConstants == &veryHighQualityConstants && readResampleCoefficients) {
275         mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate );
276     } else {
277         mFirCoefs = (const int32_t *)
278                 ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown);
279     }
280 
281     // select the appropriate resampler
282     switch (mChannelCount) {
283     case 1:
284         return resample<1>(out, outFrameCount, provider);
285     case 2:
286         return resample<2>(out, outFrameCount, provider);
287     default:
288         LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount);
289         return 0;
290     }
291 }
292 
293 
294 template<int CHANNELS>
resample(int32_t * out,size_t outFrameCount,AudioBufferProvider * provider)295 size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount,
296         AudioBufferProvider* provider)
297 {
298     const Constants& c(*mConstants);
299     const size_t headOffset = c.halfNumCoefs*CHANNELS;
300     int16_t* impulse = mImpulse;
301     uint32_t vRL = mVolumeRL;
302     size_t inputIndex = mInputIndex;
303     uint32_t phaseFraction = mPhaseFraction;
304     uint32_t phaseIncrement = mPhaseIncrement;
305     size_t outputIndex = 0;
306     size_t outputSampleCount = outFrameCount * 2;
307     size_t inFrameCount = getInFrameCountRequired(outFrameCount);
308 
309     while (outputIndex < outputSampleCount) {
310         // buffer is empty, fetch a new one
311         while (mBuffer.frameCount == 0) {
312             mBuffer.frameCount = inFrameCount;
313             provider->getNextBuffer(&mBuffer);
314             if (mBuffer.raw == NULL) {
315                 goto resample_exit;
316             }
317             const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits;
318             if (phaseIndex == 1) {
319                 // read one frame
320                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
321             } else if (phaseIndex == 2) {
322                 // read 2 frames
323                 read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
324                 inputIndex++;
325                 if (inputIndex >= mBuffer.frameCount) {
326                     inputIndex -= mBuffer.frameCount;
327                     provider->releaseBuffer(&mBuffer);
328                 } else {
329                     read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex);
330                 }
331             }
332         }
333         int16_t const * const in = mBuffer.i16;
334         const size_t frameCount = mBuffer.frameCount;
335 
336         // Always read-in the first samples from the input buffer
337         int16_t* head = impulse + headOffset;
338         for (size_t i=0 ; i<CHANNELS ; i++) {
339             head[i] = in[inputIndex*CHANNELS + i];
340         }
341 
342         // handle boundary case
343         while (CC_LIKELY(outputIndex < outputSampleCount)) {
344             filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL);
345             outputIndex += 2;
346 
347             phaseFraction += phaseIncrement;
348             const size_t phaseIndex = phaseFraction >> kNumPhaseBits;
349             for (size_t i=0 ; i<phaseIndex ; i++) {
350                 inputIndex++;
351                 if (inputIndex >= frameCount) {
352                     goto done;  // need a new buffer
353                 }
354                 read<CHANNELS>(impulse, phaseFraction, in, inputIndex);
355             }
356         }
357 done:
358         // if done with buffer, save samples
359         if (inputIndex >= frameCount) {
360             inputIndex -= frameCount;
361             provider->releaseBuffer(&mBuffer);
362         }
363     }
364 
365 resample_exit:
366     mImpulse = impulse;
367     mInputIndex = inputIndex;
368     mPhaseFraction = phaseFraction;
369     return outputIndex / CHANNELS;
370 }
371 
372 template<int CHANNELS>
373 /***
374 * read()
375 *
376 * This function reads only one frame from input buffer and writes it in
377 * state buffer
378 *
379 **/
read(int16_t * & impulse,uint32_t & phaseFraction,const int16_t * in,size_t inputIndex)380 void AudioResamplerSinc::read(
381         int16_t*& impulse, uint32_t& phaseFraction,
382         const int16_t* in, size_t inputIndex)
383 {
384     impulse += CHANNELS;
385     phaseFraction -= 1LU<<kNumPhaseBits;
386 
387     const Constants& c(*mConstants);
388     if (CC_UNLIKELY(impulse >= mRingFull)) {
389         const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS;
390         memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize);
391         impulse -= stateSize;
392     }
393 
394     int16_t* head = impulse + c.halfNumCoefs*CHANNELS;
395     for (size_t i=0 ; i<CHANNELS ; i++) {
396         head[i] = in[inputIndex*CHANNELS + i];
397     }
398 }
399 
400 template<int CHANNELS>
filterCoefficient(int32_t * out,uint32_t phase,const int16_t * samples,uint32_t vRL)401 void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase,
402          const int16_t *samples, uint32_t vRL)
403 {
404     // NOTE: be very careful when modifying the code here. register
405     // pressure is very high and a small change might cause the compiler
406     // to generate far less efficient code.
407     // Always validate the result with objdump or test-resample.
408 
409     // compute the index of the coefficient on the positive side and
410     // negative side
411     const Constants& c(*mConstants);
412     const int32_t ONE = c.cMask | c.pMask;
413     uint32_t indexP = ( phase & c.cMask) >> c.cShift;
414     uint32_t lerpP  = ( phase & c.pMask) >> c.pShift;
415     uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift;
416     uint32_t lerpN  = ((ONE-phase) & c.pMask) >> c.pShift;
417 
418     const size_t offset = c.halfNumCoefs;
419     indexP *= offset;
420     indexN *= offset;
421 
422     int32_t const* coefsP = mFirCoefs + indexP;
423     int32_t const* coefsN = mFirCoefs + indexN;
424     int16_t const* sP = samples;
425     int16_t const* sN = samples + CHANNELS;
426 
427     size_t count = offset;
428 
429 #if !USE_NEON
430     int32_t l = 0;
431     int32_t r = 0;
432     for (size_t i=0 ; i<count ; i++) {
433         interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP);
434         sP -= CHANNELS;
435         interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN);
436         sN += CHANNELS;
437     }
438     out[0] += 2 * mulRL(1, l, vRL);
439     out[1] += 2 * mulRL(0, r, vRL);
440 #else
441     UNUSED(vRL);
442     if (CHANNELS == 1) {
443         int32_t const* coefsP1 = coefsP + offset;
444         int32_t const* coefsN1 = coefsN + offset;
445         sP -= CHANNELS*3;
446 
447         int32x4_t sum;
448         int32x2_t lerpPN;
449         lerpPN = vdup_n_s32(0);
450         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
451         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
452         lerpPN = vshl_n_s32(lerpPN, 16);
453         sum = vdupq_n_s32(0);
454 
455         int16x4_t sampleP, sampleN;
456         int32x4_t samplePExt, sampleNExt;
457         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
458 
459         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
460         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
461         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
462         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
463         for (; count > 0; count -= 4) {
464             sampleP = vld1_s16(sP);
465             sampleN = vld1_s16(sN);
466             coefsPV0 = vld1q_s32(coefsP);
467             coefsNV0 = vld1q_s32(coefsN);
468             coefsPV1 = vld1q_s32(coefsP1);
469             coefsNV1 = vld1q_s32(coefsN1);
470             sP -= 4;
471             sN += 4;
472             coefsP += 4;
473             coefsN += 4;
474             coefsP1 += 4;
475             coefsN1 += 4;
476 
477             sampleP = vrev64_s16(sampleP);
478 
479             // interpolate (step1)
480             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
481             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
482             samplePExt = vshll_n_s16(sampleP, 15);
483             // interpolate (step2)
484             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
485             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
486             sampleNExt = vshll_n_s16(sampleN, 15);
487             // interpolate (step3)
488             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
489             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
490 
491             samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0);
492             sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0);
493             sum = vaddq_s32(sum, samplePExt);
494             sum = vaddq_s32(sum, sampleNExt);
495         }
496         int32x2_t volumesV, outV;
497         volumesV = vld1_s32(mVolumeSIMD);
498         outV = vld1_s32(out);
499 
500         //add all 4 partial sums
501         int32x2_t sumLow, sumHigh;
502         sumLow = vget_low_s32(sum);
503         sumHigh = vget_high_s32(sum);
504         sumLow = vpadd_s32(sumLow, sumHigh);
505         sumLow = vpadd_s32(sumLow, sumLow);
506 
507         sumLow = vqrdmulh_s32(sumLow, volumesV);
508         outV = vadd_s32(outV, sumLow);
509         vst1_s32(out, outV);
510     } else if (CHANNELS == 2) {
511         int32_t const* coefsP1 = coefsP + offset;
512         int32_t const* coefsN1 = coefsN + offset;
513         sP -= CHANNELS*3;
514 
515         int32x4_t sum0, sum1;
516         int32x2_t lerpPN;
517 
518         lerpPN = vdup_n_s32(0);
519         lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0);
520         lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1);
521         lerpPN = vshl_n_s32(lerpPN, 16);
522         sum0 = vdupq_n_s32(0);
523         sum1 = vdupq_n_s32(0);
524 
525         int16x4x2_t sampleP, sampleN;
526         int32x4x2_t samplePExt, sampleNExt;
527         int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1;
528 
529         coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16);
530         coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16);
531         coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16);
532         coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16);
533         for (; count > 0; count -= 4) {
534             sampleP = vld2_s16(sP);
535             sampleN = vld2_s16(sN);
536             coefsPV0 = vld1q_s32(coefsP);
537             coefsNV0 = vld1q_s32(coefsN);
538             coefsPV1 = vld1q_s32(coefsP1);
539             coefsNV1 = vld1q_s32(coefsN1);
540             sP -= 8;
541             sN += 8;
542             coefsP += 4;
543             coefsN += 4;
544             coefsP1 += 4;
545             coefsN1 += 4;
546 
547             sampleP.val[0] = vrev64_s16(sampleP.val[0]);
548             sampleP.val[1] = vrev64_s16(sampleP.val[1]);
549 
550             // interpolate (step1)
551             coefsPV1 = vsubq_s32(coefsPV1, coefsPV0);
552             coefsNV1 = vsubq_s32(coefsNV1, coefsNV0);
553             samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15);
554             samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15);
555             // interpolate (step2)
556             coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0);
557             coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1);
558             sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15);
559             sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15);
560             // interpolate (step3)
561             coefsPV0 = vaddq_s32(coefsPV0, coefsPV1);
562             coefsNV0 = vaddq_s32(coefsNV0, coefsNV1);
563 
564             samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0);
565             samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0);
566             sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0);
567             sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0);
568             sum0 = vaddq_s32(sum0, samplePExt.val[0]);
569             sum1 = vaddq_s32(sum1, samplePExt.val[1]);
570             sum0 = vaddq_s32(sum0, sampleNExt.val[0]);
571             sum1 = vaddq_s32(sum1, sampleNExt.val[1]);
572         }
573         int32x2_t volumesV, outV;
574         volumesV = vld1_s32(mVolumeSIMD);
575         outV = vld1_s32(out);
576 
577         //add all 4 partial sums
578         int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1;
579         sumLow0 = vget_low_s32(sum0);
580         sumHigh0 = vget_high_s32(sum0);
581         sumLow1 = vget_low_s32(sum1);
582         sumHigh1 = vget_high_s32(sum1);
583         sumLow0 = vpadd_s32(sumLow0, sumHigh0);
584         sumLow0 = vpadd_s32(sumLow0, sumLow0);
585         sumLow1 = vpadd_s32(sumLow1, sumHigh1);
586         sumLow1 = vpadd_s32(sumLow1, sumLow1);
587 
588         sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0];
589         sumLow0 = vqrdmulh_s32(sumLow0, volumesV);
590         outV = vadd_s32(outV, sumLow0);
591         vst1_s32(out, outV);
592     }
593 #endif
594 }
595 
596 template<int CHANNELS>
interpolate(int32_t & l,int32_t & r,const int32_t * coefs,size_t offset,int32_t lerp,const int16_t * samples)597 void AudioResamplerSinc::interpolate(
598         int32_t& l, int32_t& r,
599         const int32_t* coefs, size_t offset,
600         int32_t lerp, const int16_t* samples)
601 {
602     int32_t c0 = coefs[0];
603     int32_t c1 = coefs[offset];
604     int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0);
605     if (CHANNELS == 2) {
606         uint32_t rl = *reinterpret_cast<const uint32_t*>(samples);
607         l = mulAddRL(1, rl, sinc, l);
608         r = mulAddRL(0, rl, sinc, r);
609     } else {
610         r = l = mulAdd(samples[0], sinc, l);
611     }
612 }
613 // ----------------------------------------------------------------------------
614 } // namespace android
615