• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 Google Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1.  Redistributions of source code must retain the above copyright
9  *     notice, this list of conditions and the following disclaimer.
10  * 2.  Redistributions in binary form must reproduce the above copyright
11  *     notice, this list of conditions and the following disclaimer in the
12  *     documentation and/or other materials provided with the distribution.
13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14  *     its contributors may be used to endorse or promote products derived
15  *     from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #if ENABLE(WEB_AUDIO)
32 
33 #include "platform/audio/SincResampler.h"
34 
35 #include "platform/audio/AudioBus.h"
36 #include "wtf/CPU.h"
37 #include "wtf/MathExtras.h"
38 
39 #if CPU(X86) || CPU(X86_64)
40 #include <emmintrin.h>
41 #endif
42 
43 // Input buffer layout, dividing the total buffer into regions (r0 - r5):
44 //
45 // |----------------|----------------------------------------------------------------|----------------|
46 //
47 //                                              blockSize + kernelSize / 2
48 //                   <-------------------------------------------------------------------------------->
49 //                                                  r0
50 //
51 //   kernelSize / 2   kernelSize / 2                                 kernelSize / 2     kernelSize / 2
52 // <---------------> <--------------->                              <---------------> <--------------->
53 //         r1                r2                                             r3                r4
54 //
55 //                                              blockSize
56 //                                     <-------------------------------------------------------------->
57 //                                                  r5
58 
59 // The Algorithm:
60 //
61 // 1) Consume input frames into r0 (r1 is zero-initialized).
62 // 2) Position kernel centered at start of r0 (r2) and generate output frames until kernel is centered at start of r4.
63 //    or we've finished generating all the output frames.
64 // 3) Copy r3 to r1 and r4 to r2.
65 // 4) Consume input frames into r5 (zero-pad if we run out of input).
66 // 5) Goto (2) until all of input is consumed.
67 //
68 // note: we're glossing over how the sub-sample handling works with m_virtualSourceIndex, etc.
69 
70 namespace blink {
71 
SincResampler(double scaleFactor,unsigned kernelSize,unsigned numberOfKernelOffsets)72 SincResampler::SincResampler(double scaleFactor, unsigned kernelSize, unsigned numberOfKernelOffsets)
73     : m_scaleFactor(scaleFactor)
74     , m_kernelSize(kernelSize)
75     , m_numberOfKernelOffsets(numberOfKernelOffsets)
76     , m_kernelStorage(m_kernelSize * (m_numberOfKernelOffsets + 1))
77     , m_virtualSourceIndex(0)
78     , m_blockSize(512)
79     , m_inputBuffer(m_blockSize + m_kernelSize) // See input buffer layout above.
80     , m_source(0)
81     , m_sourceFramesAvailable(0)
82     , m_sourceProvider(0)
83     , m_isBufferPrimed(false)
84 {
85     initializeKernel();
86 }
87 
initializeKernel()88 void SincResampler::initializeKernel()
89 {
90     // Blackman window parameters.
91     double alpha = 0.16;
92     double a0 = 0.5 * (1.0 - alpha);
93     double a1 = 0.5;
94     double a2 = 0.5 * alpha;
95 
96     // sincScaleFactor is basically the normalized cutoff frequency of the low-pass filter.
97     double sincScaleFactor = m_scaleFactor > 1.0 ? 1.0 / m_scaleFactor : 1.0;
98 
99     // The sinc function is an idealized brick-wall filter, but since we're windowing it the
100     // transition from pass to stop does not happen right away. So we should adjust the
101     // lowpass filter cutoff slightly downward to avoid some aliasing at the very high-end.
102     // FIXME: this value is empirical and to be more exact should vary depending on m_kernelSize.
103     sincScaleFactor *= 0.9;
104 
105     int n = m_kernelSize;
106     int halfSize = n / 2;
107 
108     // Generates a set of windowed sinc() kernels.
109     // We generate a range of sub-sample offsets from 0.0 to 1.0.
110     for (unsigned offsetIndex = 0; offsetIndex <= m_numberOfKernelOffsets; ++offsetIndex) {
111         double subsampleOffset = static_cast<double>(offsetIndex) / m_numberOfKernelOffsets;
112 
113         for (int i = 0; i < n; ++i) {
114             // Compute the sinc() with offset.
115             double s = sincScaleFactor * piDouble * (i - halfSize - subsampleOffset);
116             double sinc = !s ? 1.0 : std::sin(s) / s;
117             sinc *= sincScaleFactor;
118 
119             // Compute Blackman window, matching the offset of the sinc().
120             double x = (i - subsampleOffset) / n;
121             double window = a0 - a1 * std::cos(twoPiDouble * x) + a2 * std::cos(twoPiDouble * 2.0 * x);
122 
123             // Window the sinc() function and store at the correct offset.
124             m_kernelStorage[i + offsetIndex * m_kernelSize] = sinc * window;
125         }
126     }
127 }
128 
consumeSource(float * buffer,unsigned numberOfSourceFrames)129 void SincResampler::consumeSource(float* buffer, unsigned numberOfSourceFrames)
130 {
131     ASSERT(m_sourceProvider);
132     if (!m_sourceProvider)
133         return;
134 
135     // Wrap the provided buffer by an AudioBus for use by the source provider.
136     RefPtr<AudioBus> bus = AudioBus::create(1, numberOfSourceFrames, false);
137 
138     // FIXME: Find a way to make the following const-correct:
139     bus->setChannelMemory(0, buffer, numberOfSourceFrames);
140 
141     m_sourceProvider->provideInput(bus.get(), numberOfSourceFrames);
142 }
143 
144 namespace {
145 
146 // BufferSourceProvider is an AudioSourceProvider wrapping an in-memory buffer.
147 
148 class BufferSourceProvider FINAL : public AudioSourceProvider {
149 public:
BufferSourceProvider(const float * source,size_t numberOfSourceFrames)150     BufferSourceProvider(const float* source, size_t numberOfSourceFrames)
151         : m_source(source)
152         , m_sourceFramesAvailable(numberOfSourceFrames)
153     {
154     }
155 
156     // Consumes samples from the in-memory buffer.
provideInput(AudioBus * bus,size_t framesToProcess)157     virtual void provideInput(AudioBus* bus, size_t framesToProcess) OVERRIDE
158     {
159         ASSERT(m_source && bus);
160         if (!m_source || !bus)
161             return;
162 
163         float* buffer = bus->channel(0)->mutableData();
164 
165         // Clamp to number of frames available and zero-pad.
166         size_t framesToCopy = std::min(m_sourceFramesAvailable, framesToProcess);
167         memcpy(buffer, m_source, sizeof(float) * framesToCopy);
168 
169         // Zero-pad if necessary.
170         if (framesToCopy < framesToProcess)
171             memset(buffer + framesToCopy, 0, sizeof(float) * (framesToProcess - framesToCopy));
172 
173         m_sourceFramesAvailable -= framesToCopy;
174         m_source += framesToCopy;
175     }
176 
177 private:
178     const float* m_source;
179     size_t m_sourceFramesAvailable;
180 };
181 
182 } // namespace
183 
process(const float * source,float * destination,unsigned numberOfSourceFrames)184 void SincResampler::process(const float* source, float* destination, unsigned numberOfSourceFrames)
185 {
186     // Resample an in-memory buffer using an AudioSourceProvider.
187     BufferSourceProvider sourceProvider(source, numberOfSourceFrames);
188 
189     unsigned numberOfDestinationFrames = static_cast<unsigned>(numberOfSourceFrames / m_scaleFactor);
190     unsigned remaining = numberOfDestinationFrames;
191 
192     while (remaining) {
193         unsigned framesThisTime = std::min(remaining, m_blockSize);
194         process(&sourceProvider, destination, framesThisTime);
195 
196         destination += framesThisTime;
197         remaining -= framesThisTime;
198     }
199 }
200 
process(AudioSourceProvider * sourceProvider,float * destination,size_t framesToProcess)201 void SincResampler::process(AudioSourceProvider* sourceProvider, float* destination, size_t framesToProcess)
202 {
203     bool isGood = sourceProvider && m_blockSize > m_kernelSize && m_inputBuffer.size() >= m_blockSize + m_kernelSize && !(m_kernelSize % 2);
204     ASSERT(isGood);
205     if (!isGood)
206         return;
207 
208     m_sourceProvider = sourceProvider;
209 
210     unsigned numberOfDestinationFrames = framesToProcess;
211 
212     // Setup various region pointers in the buffer (see diagram above).
213     float* r0 = m_inputBuffer.data() + m_kernelSize / 2;
214     float* r1 = m_inputBuffer.data();
215     float* r2 = r0;
216     float* r3 = r0 + m_blockSize - m_kernelSize / 2;
217     float* r4 = r0 + m_blockSize;
218     float* r5 = r0 + m_kernelSize / 2;
219 
220     // Step (1)
221     // Prime the input buffer at the start of the input stream.
222     if (!m_isBufferPrimed) {
223         consumeSource(r0, m_blockSize + m_kernelSize / 2);
224         m_isBufferPrimed = true;
225     }
226 
227     // Step (2)
228 
229     while (numberOfDestinationFrames) {
230         while (m_virtualSourceIndex < m_blockSize) {
231             // m_virtualSourceIndex lies in between two kernel offsets so figure out what they are.
232             int sourceIndexI = static_cast<int>(m_virtualSourceIndex);
233             double subsampleRemainder = m_virtualSourceIndex - sourceIndexI;
234 
235             double virtualOffsetIndex = subsampleRemainder * m_numberOfKernelOffsets;
236             int offsetIndex = static_cast<int>(virtualOffsetIndex);
237 
238             float* k1 = m_kernelStorage.data() + offsetIndex * m_kernelSize;
239             float* k2 = k1 + m_kernelSize;
240 
241             // Initialize input pointer based on quantized m_virtualSourceIndex.
242             float* inputP = r1 + sourceIndexI;
243 
244             // We'll compute "convolutions" for the two kernels which straddle m_virtualSourceIndex
245             float sum1 = 0;
246             float sum2 = 0;
247 
248             // Figure out how much to weight each kernel's "convolution".
249             double kernelInterpolationFactor = virtualOffsetIndex - offsetIndex;
250 
251             // Generate a single output sample.
252             int n = m_kernelSize;
253 
254 #define CONVOLVE_ONE_SAMPLE      \
255             input = *inputP++;   \
256             sum1 += input * *k1; \
257             sum2 += input * *k2; \
258             ++k1;                \
259             ++k2;
260 
261             {
262                 float input;
263 
264 #if CPU(X86) || CPU(X86_64)
265                 // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed seperately.
266                 while ((reinterpret_cast<uintptr_t>(inputP) & 0x0F) && n) {
267                     CONVOLVE_ONE_SAMPLE
268                     n--;
269                 }
270 
271                 // Now the inputP is aligned and start to apply SSE.
272                 float* endP = inputP + n - n % 4;
273                 __m128 mInput;
274                 __m128 mK1;
275                 __m128 mK2;
276                 __m128 mul1;
277                 __m128 mul2;
278 
279                 __m128 sums1 = _mm_setzero_ps();
280                 __m128 sums2 = _mm_setzero_ps();
281                 bool k1Aligned = !(reinterpret_cast<uintptr_t>(k1) & 0x0F);
282                 bool k2Aligned = !(reinterpret_cast<uintptr_t>(k2) & 0x0F);
283 
284 #define LOAD_DATA(l1, l2)                        \
285                 mInput = _mm_load_ps(inputP);    \
286                 mK1 = _mm_##l1##_ps(k1);         \
287                 mK2 = _mm_##l2##_ps(k2);
288 
289 #define CONVOLVE_4_SAMPLES                       \
290                 mul1 = _mm_mul_ps(mInput, mK1);  \
291                 mul2 = _mm_mul_ps(mInput, mK2);  \
292                 sums1 = _mm_add_ps(sums1, mul1); \
293                 sums2 = _mm_add_ps(sums2, mul2); \
294                 inputP += 4;                     \
295                 k1 += 4;                         \
296                 k2 += 4;
297 
298                 if (k1Aligned && k2Aligned) { // both aligned
299                     while (inputP < endP) {
300                         LOAD_DATA(load, load)
301                         CONVOLVE_4_SAMPLES
302                     }
303                 } else if (!k1Aligned && k2Aligned) { // only k2 aligned
304                     while (inputP < endP) {
305                         LOAD_DATA(loadu, load)
306                         CONVOLVE_4_SAMPLES
307                     }
308                 } else if (k1Aligned && !k2Aligned) { // only k1 aligned
309                     while (inputP < endP) {
310                         LOAD_DATA(load, loadu)
311                         CONVOLVE_4_SAMPLES
312                     }
313                 } else { // both non-aligned
314                     while (inputP < endP) {
315                         LOAD_DATA(loadu, loadu)
316                         CONVOLVE_4_SAMPLES
317                     }
318                 }
319 
320                 // Summarize the SSE results to sum1 and sum2.
321                 float* groupSumP = reinterpret_cast<float*>(&sums1);
322                 sum1 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
323                 groupSumP = reinterpret_cast<float*>(&sums2);
324                 sum2 += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3];
325 
326                 n %= 4;
327                 while (n) {
328                     CONVOLVE_ONE_SAMPLE
329                     n--;
330                 }
331 #else
332                 // FIXME: add ARM NEON optimizations for the following. The scalar code-path can probably also be optimized better.
333 
334                 // Optimize size 32 and size 64 kernels by unrolling the while loop.
335                 // A 20 - 30% speed improvement was measured in some cases by using this approach.
336 
337                 if (n == 32) {
338                     CONVOLVE_ONE_SAMPLE // 1
339                     CONVOLVE_ONE_SAMPLE // 2
340                     CONVOLVE_ONE_SAMPLE // 3
341                     CONVOLVE_ONE_SAMPLE // 4
342                     CONVOLVE_ONE_SAMPLE // 5
343                     CONVOLVE_ONE_SAMPLE // 6
344                     CONVOLVE_ONE_SAMPLE // 7
345                     CONVOLVE_ONE_SAMPLE // 8
346                     CONVOLVE_ONE_SAMPLE // 9
347                     CONVOLVE_ONE_SAMPLE // 10
348                     CONVOLVE_ONE_SAMPLE // 11
349                     CONVOLVE_ONE_SAMPLE // 12
350                     CONVOLVE_ONE_SAMPLE // 13
351                     CONVOLVE_ONE_SAMPLE // 14
352                     CONVOLVE_ONE_SAMPLE // 15
353                     CONVOLVE_ONE_SAMPLE // 16
354                     CONVOLVE_ONE_SAMPLE // 17
355                     CONVOLVE_ONE_SAMPLE // 18
356                     CONVOLVE_ONE_SAMPLE // 19
357                     CONVOLVE_ONE_SAMPLE // 20
358                     CONVOLVE_ONE_SAMPLE // 21
359                     CONVOLVE_ONE_SAMPLE // 22
360                     CONVOLVE_ONE_SAMPLE // 23
361                     CONVOLVE_ONE_SAMPLE // 24
362                     CONVOLVE_ONE_SAMPLE // 25
363                     CONVOLVE_ONE_SAMPLE // 26
364                     CONVOLVE_ONE_SAMPLE // 27
365                     CONVOLVE_ONE_SAMPLE // 28
366                     CONVOLVE_ONE_SAMPLE // 29
367                     CONVOLVE_ONE_SAMPLE // 30
368                     CONVOLVE_ONE_SAMPLE // 31
369                     CONVOLVE_ONE_SAMPLE // 32
370                 } else if (n == 64) {
371                     CONVOLVE_ONE_SAMPLE // 1
372                     CONVOLVE_ONE_SAMPLE // 2
373                     CONVOLVE_ONE_SAMPLE // 3
374                     CONVOLVE_ONE_SAMPLE // 4
375                     CONVOLVE_ONE_SAMPLE // 5
376                     CONVOLVE_ONE_SAMPLE // 6
377                     CONVOLVE_ONE_SAMPLE // 7
378                     CONVOLVE_ONE_SAMPLE // 8
379                     CONVOLVE_ONE_SAMPLE // 9
380                     CONVOLVE_ONE_SAMPLE // 10
381                     CONVOLVE_ONE_SAMPLE // 11
382                     CONVOLVE_ONE_SAMPLE // 12
383                     CONVOLVE_ONE_SAMPLE // 13
384                     CONVOLVE_ONE_SAMPLE // 14
385                     CONVOLVE_ONE_SAMPLE // 15
386                     CONVOLVE_ONE_SAMPLE // 16
387                     CONVOLVE_ONE_SAMPLE // 17
388                     CONVOLVE_ONE_SAMPLE // 18
389                     CONVOLVE_ONE_SAMPLE // 19
390                     CONVOLVE_ONE_SAMPLE // 20
391                     CONVOLVE_ONE_SAMPLE // 21
392                     CONVOLVE_ONE_SAMPLE // 22
393                     CONVOLVE_ONE_SAMPLE // 23
394                     CONVOLVE_ONE_SAMPLE // 24
395                     CONVOLVE_ONE_SAMPLE // 25
396                     CONVOLVE_ONE_SAMPLE // 26
397                     CONVOLVE_ONE_SAMPLE // 27
398                     CONVOLVE_ONE_SAMPLE // 28
399                     CONVOLVE_ONE_SAMPLE // 29
400                     CONVOLVE_ONE_SAMPLE // 30
401                     CONVOLVE_ONE_SAMPLE // 31
402                     CONVOLVE_ONE_SAMPLE // 32
403                     CONVOLVE_ONE_SAMPLE // 33
404                     CONVOLVE_ONE_SAMPLE // 34
405                     CONVOLVE_ONE_SAMPLE // 35
406                     CONVOLVE_ONE_SAMPLE // 36
407                     CONVOLVE_ONE_SAMPLE // 37
408                     CONVOLVE_ONE_SAMPLE // 38
409                     CONVOLVE_ONE_SAMPLE // 39
410                     CONVOLVE_ONE_SAMPLE // 40
411                     CONVOLVE_ONE_SAMPLE // 41
412                     CONVOLVE_ONE_SAMPLE // 42
413                     CONVOLVE_ONE_SAMPLE // 43
414                     CONVOLVE_ONE_SAMPLE // 44
415                     CONVOLVE_ONE_SAMPLE // 45
416                     CONVOLVE_ONE_SAMPLE // 46
417                     CONVOLVE_ONE_SAMPLE // 47
418                     CONVOLVE_ONE_SAMPLE // 48
419                     CONVOLVE_ONE_SAMPLE // 49
420                     CONVOLVE_ONE_SAMPLE // 50
421                     CONVOLVE_ONE_SAMPLE // 51
422                     CONVOLVE_ONE_SAMPLE // 52
423                     CONVOLVE_ONE_SAMPLE // 53
424                     CONVOLVE_ONE_SAMPLE // 54
425                     CONVOLVE_ONE_SAMPLE // 55
426                     CONVOLVE_ONE_SAMPLE // 56
427                     CONVOLVE_ONE_SAMPLE // 57
428                     CONVOLVE_ONE_SAMPLE // 58
429                     CONVOLVE_ONE_SAMPLE // 59
430                     CONVOLVE_ONE_SAMPLE // 60
431                     CONVOLVE_ONE_SAMPLE // 61
432                     CONVOLVE_ONE_SAMPLE // 62
433                     CONVOLVE_ONE_SAMPLE // 63
434                     CONVOLVE_ONE_SAMPLE // 64
435                 } else {
436                     while (n--) {
437                         // Non-optimized using actual while loop.
438                         CONVOLVE_ONE_SAMPLE
439                     }
440                 }
441 #endif
442             }
443 
444             // Linearly interpolate the two "convolutions".
445             double result = (1.0 - kernelInterpolationFactor) * sum1 + kernelInterpolationFactor * sum2;
446 
447             *destination++ = result;
448 
449             // Advance the virtual index.
450             m_virtualSourceIndex += m_scaleFactor;
451 
452             --numberOfDestinationFrames;
453             if (!numberOfDestinationFrames)
454                 return;
455         }
456 
457         // Wrap back around to the start.
458         m_virtualSourceIndex -= m_blockSize;
459 
460         // Step (3) Copy r3 to r1 and r4 to r2.
461         // This wraps the last input frames back to the start of the buffer.
462         memcpy(r1, r3, sizeof(float) * (m_kernelSize / 2));
463         memcpy(r2, r4, sizeof(float) * (m_kernelSize / 2));
464 
465         // Step (4)
466         // Refresh the buffer with more input.
467         consumeSource(r5, m_blockSize);
468     }
469 }
470 
471 } // namespace blink
472 
473 #endif // ENABLE(WEB_AUDIO)
474