• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
12 
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16 
17 namespace webrtc {
18 namespace {
19 
20 enum {
21   kSamplesPer8kHzChannel = 80,
22   kSamplesPer16kHzChannel = 160,
23   kSamplesPer32kHzChannel = 320
24 };
25 
HasKeyboardChannel(AudioProcessing::ChannelLayout layout)26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27   switch (layout) {
28     case AudioProcessing::kMono:
29     case AudioProcessing::kStereo:
30       return false;
31     case AudioProcessing::kMonoAndKeyboard:
32     case AudioProcessing::kStereoAndKeyboard:
33       return true;
34   }
35   assert(false);
36   return false;
37 }
38 
KeyboardChannelIndex(AudioProcessing::ChannelLayout layout)39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40   switch (layout) {
41     case AudioProcessing::kMono:
42     case AudioProcessing::kStereo:
43       assert(false);
44       return -1;
45     case AudioProcessing::kMonoAndKeyboard:
46       return 1;
47     case AudioProcessing::kStereoAndKeyboard:
48       return 2;
49   }
50   assert(false);
51   return -1;
52 }
53 
54 
StereoToMono(const float * left,const float * right,float * out,int samples_per_channel)55 void StereoToMono(const float* left, const float* right, float* out,
56                   int samples_per_channel) {
57   for (int i = 0; i < samples_per_channel; ++i) {
58     out[i] = (left[i] + right[i]) / 2;
59   }
60 }
61 
StereoToMono(const int16_t * left,const int16_t * right,int16_t * out,int samples_per_channel)62 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
63                   int samples_per_channel) {
64   for (int i = 0; i < samples_per_channel; ++i) {
65     out[i] = (left[i] + right[i]) >> 1;
66   }
67 }
68 
69 }  // namespace
70 
71 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
72 // broken when someone requests write access to either ChannelBuffer, and
73 // reestablished when someone requests the outdated ChannelBuffer. It is
74 // therefore safe to use the return value of ibuf() and fbuf() until the next
75 // call to the other method.
76 class IFChannelBuffer {
77  public:
IFChannelBuffer(int samples_per_channel,int num_channels)78   IFChannelBuffer(int samples_per_channel, int num_channels)
79       : ivalid_(true),
80         ibuf_(samples_per_channel, num_channels),
81         fvalid_(true),
82         fbuf_(samples_per_channel, num_channels) {}
83 
ibuf()84   ChannelBuffer<int16_t>* ibuf() {
85     RefreshI();
86     fvalid_ = false;
87     return &ibuf_;
88   }
89 
fbuf()90   ChannelBuffer<float>* fbuf() {
91     RefreshF();
92     ivalid_ = false;
93     return &fbuf_;
94   }
95 
96  private:
RefreshF()97   void RefreshF() {
98     if (!fvalid_) {
99       assert(ivalid_);
100       const int16_t* const int_data = ibuf_.data();
101       float* const float_data = fbuf_.data();
102       const int length = fbuf_.length();
103       for (int i = 0; i < length; ++i)
104         float_data[i] = int_data[i];
105       fvalid_ = true;
106     }
107   }
108 
RefreshI()109   void RefreshI() {
110     if (!ivalid_) {
111       assert(fvalid_);
112       const float* const float_data = fbuf_.data();
113       int16_t* const int_data = ibuf_.data();
114       const int length = ibuf_.length();
115       for (int i = 0; i < length; ++i)
116         int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
117                                      float_data[i],
118                                      std::numeric_limits<int16_t>::min());
119       ivalid_ = true;
120     }
121   }
122 
123   bool ivalid_;
124   ChannelBuffer<int16_t> ibuf_;
125   bool fvalid_;
126   ChannelBuffer<float> fbuf_;
127 };
128 
129 class SplitChannelBuffer {
130  public:
SplitChannelBuffer(int samples_per_split_channel,int num_channels)131   SplitChannelBuffer(int samples_per_split_channel, int num_channels)
132       : low_(samples_per_split_channel, num_channels),
133         high_(samples_per_split_channel, num_channels) {
134   }
~SplitChannelBuffer()135   ~SplitChannelBuffer() {}
136 
low_channel(int i)137   int16_t* low_channel(int i) { return low_.ibuf()->channel(i); }
high_channel(int i)138   int16_t* high_channel(int i) { return high_.ibuf()->channel(i); }
low_channel_f(int i)139   float* low_channel_f(int i) { return low_.fbuf()->channel(i); }
high_channel_f(int i)140   float* high_channel_f(int i) { return high_.fbuf()->channel(i); }
141 
142  private:
143   IFChannelBuffer low_;
144   IFChannelBuffer high_;
145 };
146 
AudioBuffer(int input_samples_per_channel,int num_input_channels,int process_samples_per_channel,int num_process_channels,int output_samples_per_channel)147 AudioBuffer::AudioBuffer(int input_samples_per_channel,
148                          int num_input_channels,
149                          int process_samples_per_channel,
150                          int num_process_channels,
151                          int output_samples_per_channel)
152   : input_samples_per_channel_(input_samples_per_channel),
153     num_input_channels_(num_input_channels),
154     proc_samples_per_channel_(process_samples_per_channel),
155     num_proc_channels_(num_process_channels),
156     output_samples_per_channel_(output_samples_per_channel),
157     samples_per_split_channel_(proc_samples_per_channel_),
158     num_mixed_channels_(0),
159     num_mixed_low_pass_channels_(0),
160     reference_copied_(false),
161     activity_(AudioFrame::kVadUnknown),
162     keyboard_data_(NULL),
163     channels_(new IFChannelBuffer(proc_samples_per_channel_,
164                                   num_proc_channels_)) {
165   assert(input_samples_per_channel_ > 0);
166   assert(proc_samples_per_channel_ > 0);
167   assert(output_samples_per_channel_ > 0);
168   assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
169   assert(num_proc_channels_ <= num_input_channels);
170 
171   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
172     input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
173                                                  num_proc_channels_));
174   }
175 
176   if (input_samples_per_channel_ != proc_samples_per_channel_ ||
177       output_samples_per_channel_ != proc_samples_per_channel_) {
178     // Create an intermediate buffer for resampling.
179     process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
180                                                    num_proc_channels_));
181   }
182 
183   if (input_samples_per_channel_ != proc_samples_per_channel_) {
184     input_resamplers_.reserve(num_proc_channels_);
185     for (int i = 0; i < num_proc_channels_; ++i) {
186       input_resamplers_.push_back(
187           new PushSincResampler(input_samples_per_channel_,
188                                 proc_samples_per_channel_));
189     }
190   }
191 
192   if (output_samples_per_channel_ != proc_samples_per_channel_) {
193     output_resamplers_.reserve(num_proc_channels_);
194     for (int i = 0; i < num_proc_channels_; ++i) {
195       output_resamplers_.push_back(
196           new PushSincResampler(proc_samples_per_channel_,
197                                 output_samples_per_channel_));
198     }
199   }
200 
201   if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
202     samples_per_split_channel_ = kSamplesPer16kHzChannel;
203     split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_,
204                                                  num_proc_channels_));
205     filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
206   }
207 }
208 
~AudioBuffer()209 AudioBuffer::~AudioBuffer() {}
210 
CopyFrom(const float * const * data,int samples_per_channel,AudioProcessing::ChannelLayout layout)211 void AudioBuffer::CopyFrom(const float* const* data,
212                            int samples_per_channel,
213                            AudioProcessing::ChannelLayout layout) {
214   assert(samples_per_channel == input_samples_per_channel_);
215   assert(ChannelsFromLayout(layout) == num_input_channels_);
216   InitForNewData();
217 
218   if (HasKeyboardChannel(layout)) {
219     keyboard_data_ = data[KeyboardChannelIndex(layout)];
220   }
221 
222   // Downmix.
223   const float* const* data_ptr = data;
224   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
225     StereoToMono(data[0],
226                  data[1],
227                  input_buffer_->channel(0),
228                  input_samples_per_channel_);
229     data_ptr = input_buffer_->channels();
230   }
231 
232   // Resample.
233   if (input_samples_per_channel_ != proc_samples_per_channel_) {
234     for (int i = 0; i < num_proc_channels_; ++i) {
235       input_resamplers_[i]->Resample(data_ptr[i],
236                                      input_samples_per_channel_,
237                                      process_buffer_->channel(i),
238                                      proc_samples_per_channel_);
239     }
240     data_ptr = process_buffer_->channels();
241   }
242 
243   // Convert to int16.
244   for (int i = 0; i < num_proc_channels_; ++i) {
245     ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
246                          channels_->ibuf()->channel(i));
247   }
248 }
249 
CopyTo(int samples_per_channel,AudioProcessing::ChannelLayout layout,float * const * data)250 void AudioBuffer::CopyTo(int samples_per_channel,
251                          AudioProcessing::ChannelLayout layout,
252                          float* const* data) {
253   assert(samples_per_channel == output_samples_per_channel_);
254   assert(ChannelsFromLayout(layout) == num_proc_channels_);
255 
256   // Convert to float.
257   float* const* data_ptr = data;
258   if (output_samples_per_channel_ != proc_samples_per_channel_) {
259     // Convert to an intermediate buffer for subsequent resampling.
260     data_ptr = process_buffer_->channels();
261   }
262   for (int i = 0; i < num_proc_channels_; ++i) {
263     ScaleToFloat(channels_->ibuf()->channel(i),
264                  proc_samples_per_channel_,
265                  data_ptr[i]);
266   }
267 
268   // Resample.
269   if (output_samples_per_channel_ != proc_samples_per_channel_) {
270     for (int i = 0; i < num_proc_channels_; ++i) {
271       output_resamplers_[i]->Resample(data_ptr[i],
272                                       proc_samples_per_channel_,
273                                       data[i],
274                                       output_samples_per_channel_);
275     }
276   }
277 }
278 
InitForNewData()279 void AudioBuffer::InitForNewData() {
280   keyboard_data_ = NULL;
281   num_mixed_channels_ = 0;
282   num_mixed_low_pass_channels_ = 0;
283   reference_copied_ = false;
284   activity_ = AudioFrame::kVadUnknown;
285 }
286 
data(int channel) const287 const int16_t* AudioBuffer::data(int channel) const {
288   assert(channel >= 0 && channel < num_proc_channels_);
289   return channels_->ibuf()->channel(channel);
290 }
291 
data(int channel)292 int16_t* AudioBuffer::data(int channel) {
293   const AudioBuffer* t = this;
294   return const_cast<int16_t*>(t->data(channel));
295 }
296 
data_f(int channel)297 float* AudioBuffer::data_f(int channel) {
298   assert(channel >= 0 && channel < num_proc_channels_);
299   return channels_->fbuf()->channel(channel);
300 }
301 
low_pass_split_data(int channel) const302 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
303   assert(channel >= 0 && channel < num_proc_channels_);
304   return split_channels_.get() ? split_channels_->low_channel(channel)
305                                : data(channel);
306 }
307 
low_pass_split_data(int channel)308 int16_t* AudioBuffer::low_pass_split_data(int channel) {
309   const AudioBuffer* t = this;
310   return const_cast<int16_t*>(t->low_pass_split_data(channel));
311 }
312 
low_pass_split_data_f(int channel)313 float* AudioBuffer::low_pass_split_data_f(int channel) {
314   assert(channel >= 0 && channel < num_proc_channels_);
315   return split_channels_.get() ? split_channels_->low_channel_f(channel)
316                                : data_f(channel);
317 }
318 
high_pass_split_data(int channel) const319 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
320   assert(channel >= 0 && channel < num_proc_channels_);
321   return split_channels_.get() ? split_channels_->high_channel(channel) : NULL;
322 }
323 
high_pass_split_data(int channel)324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
325   const AudioBuffer* t = this;
326   return const_cast<int16_t*>(t->high_pass_split_data(channel));
327 }
328 
high_pass_split_data_f(int channel)329 float* AudioBuffer::high_pass_split_data_f(int channel) {
330   assert(channel >= 0 && channel < num_proc_channels_);
331   return split_channels_.get() ? split_channels_->high_channel_f(channel)
332                                : NULL;
333 }
334 
mixed_data(int channel) const335 const int16_t* AudioBuffer::mixed_data(int channel) const {
336   assert(channel >= 0 && channel < num_mixed_channels_);
337 
338   return mixed_channels_->channel(channel);
339 }
340 
mixed_low_pass_data(int channel) const341 const int16_t* AudioBuffer::mixed_low_pass_data(int channel) const {
342   assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
343 
344   return mixed_low_pass_channels_->channel(channel);
345 }
346 
low_pass_reference(int channel) const347 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
348   assert(channel >= 0 && channel < num_proc_channels_);
349   if (!reference_copied_) {
350     return NULL;
351   }
352 
353   return low_pass_reference_channels_->channel(channel);
354 }
355 
keyboard_data() const356 const float* AudioBuffer::keyboard_data() const {
357   return keyboard_data_;
358 }
359 
filter_states(int channel)360 SplitFilterStates* AudioBuffer::filter_states(int channel) {
361   assert(channel >= 0 && channel < num_proc_channels_);
362   return &filter_states_[channel];
363 }
364 
set_activity(AudioFrame::VADActivity activity)365 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
366   activity_ = activity;
367 }
368 
activity() const369 AudioFrame::VADActivity AudioBuffer::activity() const {
370   return activity_;
371 }
372 
num_channels() const373 int AudioBuffer::num_channels() const {
374   return num_proc_channels_;
375 }
376 
samples_per_channel() const377 int AudioBuffer::samples_per_channel() const {
378   return proc_samples_per_channel_;
379 }
380 
samples_per_split_channel() const381 int AudioBuffer::samples_per_split_channel() const {
382   return samples_per_split_channel_;
383 }
384 
samples_per_keyboard_channel() const385 int AudioBuffer::samples_per_keyboard_channel() const {
386   // We don't resample the keyboard channel.
387   return input_samples_per_channel_;
388 }
389 
390 // TODO(andrew): Do deinterleaving and mixing in one step?
DeinterleaveFrom(AudioFrame * frame)391 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
392   assert(proc_samples_per_channel_ == input_samples_per_channel_);
393   assert(num_proc_channels_ == num_input_channels_);
394   assert(frame->num_channels_ == num_proc_channels_);
395   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
396   InitForNewData();
397   activity_ = frame->vad_activity_;
398 
399   int16_t* interleaved = frame->data_;
400   for (int i = 0; i < num_proc_channels_; i++) {
401     int16_t* deinterleaved = channels_->ibuf()->channel(i);
402     int interleaved_idx = i;
403     for (int j = 0; j < proc_samples_per_channel_; j++) {
404       deinterleaved[j] = interleaved[interleaved_idx];
405       interleaved_idx += num_proc_channels_;
406     }
407   }
408 }
409 
InterleaveTo(AudioFrame * frame,bool data_changed) const410 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
411   assert(proc_samples_per_channel_ == output_samples_per_channel_);
412   assert(num_proc_channels_ == num_input_channels_);
413   assert(frame->num_channels_ == num_proc_channels_);
414   assert(frame->samples_per_channel_ == proc_samples_per_channel_);
415   frame->vad_activity_ = activity_;
416 
417   if (!data_changed) {
418     return;
419   }
420 
421   int16_t* interleaved = frame->data_;
422   for (int i = 0; i < num_proc_channels_; i++) {
423     int16_t* deinterleaved = channels_->ibuf()->channel(i);
424     int interleaved_idx = i;
425     for (int j = 0; j < proc_samples_per_channel_; j++) {
426       interleaved[interleaved_idx] = deinterleaved[j];
427       interleaved_idx += num_proc_channels_;
428     }
429   }
430 }
431 
CopyAndMix(int num_mixed_channels)432 void AudioBuffer::CopyAndMix(int num_mixed_channels) {
433   // We currently only support the stereo to mono case.
434   assert(num_proc_channels_ == 2);
435   assert(num_mixed_channels == 1);
436   if (!mixed_channels_.get()) {
437     mixed_channels_.reset(
438         new ChannelBuffer<int16_t>(proc_samples_per_channel_,
439                                    num_mixed_channels));
440   }
441 
442   StereoToMono(channels_->ibuf()->channel(0),
443                channels_->ibuf()->channel(1),
444                mixed_channels_->channel(0),
445                proc_samples_per_channel_);
446 
447   num_mixed_channels_ = num_mixed_channels;
448 }
449 
CopyAndMixLowPass(int num_mixed_channels)450 void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
451   // We currently only support the stereo to mono case.
452   assert(num_proc_channels_ == 2);
453   assert(num_mixed_channels == 1);
454   if (!mixed_low_pass_channels_.get()) {
455     mixed_low_pass_channels_.reset(
456         new ChannelBuffer<int16_t>(samples_per_split_channel_,
457                                    num_mixed_channels));
458   }
459 
460   StereoToMono(low_pass_split_data(0),
461                low_pass_split_data(1),
462                mixed_low_pass_channels_->channel(0),
463                samples_per_split_channel_);
464 
465   num_mixed_low_pass_channels_ = num_mixed_channels;
466 }
467 
CopyLowPassToReference()468 void AudioBuffer::CopyLowPassToReference() {
469   reference_copied_ = true;
470   if (!low_pass_reference_channels_.get()) {
471     low_pass_reference_channels_.reset(
472         new ChannelBuffer<int16_t>(samples_per_split_channel_,
473                                    num_proc_channels_));
474   }
475   for (int i = 0; i < num_proc_channels_; i++) {
476     low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
477   }
478 }
479 
480 }  // namespace webrtc
481