1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
12
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16
17 namespace webrtc {
18 namespace {
19
20 enum {
21 kSamplesPer8kHzChannel = 80,
22 kSamplesPer16kHzChannel = 160,
23 kSamplesPer32kHzChannel = 320
24 };
25
HasKeyboardChannel(AudioProcessing::ChannelLayout layout)26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27 switch (layout) {
28 case AudioProcessing::kMono:
29 case AudioProcessing::kStereo:
30 return false;
31 case AudioProcessing::kMonoAndKeyboard:
32 case AudioProcessing::kStereoAndKeyboard:
33 return true;
34 }
35 assert(false);
36 return false;
37 }
38
KeyboardChannelIndex(AudioProcessing::ChannelLayout layout)39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40 switch (layout) {
41 case AudioProcessing::kMono:
42 case AudioProcessing::kStereo:
43 assert(false);
44 return -1;
45 case AudioProcessing::kMonoAndKeyboard:
46 return 1;
47 case AudioProcessing::kStereoAndKeyboard:
48 return 2;
49 }
50 assert(false);
51 return -1;
52 }
53
54
StereoToMono(const float * left,const float * right,float * out,int samples_per_channel)55 void StereoToMono(const float* left, const float* right, float* out,
56 int samples_per_channel) {
57 for (int i = 0; i < samples_per_channel; ++i) {
58 out[i] = (left[i] + right[i]) / 2;
59 }
60 }
61
StereoToMono(const int16_t * left,const int16_t * right,int16_t * out,int samples_per_channel)62 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
63 int samples_per_channel) {
64 for (int i = 0; i < samples_per_channel; ++i) {
65 out[i] = (left[i] + right[i]) >> 1;
66 }
67 }
68
69 } // namespace
70
71 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
72 // broken when someone requests write access to either ChannelBuffer, and
73 // reestablished when someone requests the outdated ChannelBuffer. It is
74 // therefore safe to use the return value of ibuf() and fbuf() until the next
75 // call to the other method.
76 class IFChannelBuffer {
77 public:
IFChannelBuffer(int samples_per_channel,int num_channels)78 IFChannelBuffer(int samples_per_channel, int num_channels)
79 : ivalid_(true),
80 ibuf_(samples_per_channel, num_channels),
81 fvalid_(true),
82 fbuf_(samples_per_channel, num_channels) {}
83
ibuf()84 ChannelBuffer<int16_t>* ibuf() {
85 RefreshI();
86 fvalid_ = false;
87 return &ibuf_;
88 }
89
fbuf()90 ChannelBuffer<float>* fbuf() {
91 RefreshF();
92 ivalid_ = false;
93 return &fbuf_;
94 }
95
96 private:
RefreshF()97 void RefreshF() {
98 if (!fvalid_) {
99 assert(ivalid_);
100 const int16_t* const int_data = ibuf_.data();
101 float* const float_data = fbuf_.data();
102 const int length = fbuf_.length();
103 for (int i = 0; i < length; ++i)
104 float_data[i] = int_data[i];
105 fvalid_ = true;
106 }
107 }
108
RefreshI()109 void RefreshI() {
110 if (!ivalid_) {
111 assert(fvalid_);
112 const float* const float_data = fbuf_.data();
113 int16_t* const int_data = ibuf_.data();
114 const int length = ibuf_.length();
115 for (int i = 0; i < length; ++i)
116 int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
117 float_data[i],
118 std::numeric_limits<int16_t>::min());
119 ivalid_ = true;
120 }
121 }
122
123 bool ivalid_;
124 ChannelBuffer<int16_t> ibuf_;
125 bool fvalid_;
126 ChannelBuffer<float> fbuf_;
127 };
128
129 class SplitChannelBuffer {
130 public:
SplitChannelBuffer(int samples_per_split_channel,int num_channels)131 SplitChannelBuffer(int samples_per_split_channel, int num_channels)
132 : low_(samples_per_split_channel, num_channels),
133 high_(samples_per_split_channel, num_channels) {
134 }
~SplitChannelBuffer()135 ~SplitChannelBuffer() {}
136
low_channel(int i)137 int16_t* low_channel(int i) { return low_.ibuf()->channel(i); }
high_channel(int i)138 int16_t* high_channel(int i) { return high_.ibuf()->channel(i); }
low_channel_f(int i)139 float* low_channel_f(int i) { return low_.fbuf()->channel(i); }
high_channel_f(int i)140 float* high_channel_f(int i) { return high_.fbuf()->channel(i); }
141
142 private:
143 IFChannelBuffer low_;
144 IFChannelBuffer high_;
145 };
146
AudioBuffer(int input_samples_per_channel,int num_input_channels,int process_samples_per_channel,int num_process_channels,int output_samples_per_channel)147 AudioBuffer::AudioBuffer(int input_samples_per_channel,
148 int num_input_channels,
149 int process_samples_per_channel,
150 int num_process_channels,
151 int output_samples_per_channel)
152 : input_samples_per_channel_(input_samples_per_channel),
153 num_input_channels_(num_input_channels),
154 proc_samples_per_channel_(process_samples_per_channel),
155 num_proc_channels_(num_process_channels),
156 output_samples_per_channel_(output_samples_per_channel),
157 samples_per_split_channel_(proc_samples_per_channel_),
158 num_mixed_channels_(0),
159 num_mixed_low_pass_channels_(0),
160 reference_copied_(false),
161 activity_(AudioFrame::kVadUnknown),
162 keyboard_data_(NULL),
163 channels_(new IFChannelBuffer(proc_samples_per_channel_,
164 num_proc_channels_)) {
165 assert(input_samples_per_channel_ > 0);
166 assert(proc_samples_per_channel_ > 0);
167 assert(output_samples_per_channel_ > 0);
168 assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
169 assert(num_proc_channels_ <= num_input_channels);
170
171 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
172 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
173 num_proc_channels_));
174 }
175
176 if (input_samples_per_channel_ != proc_samples_per_channel_ ||
177 output_samples_per_channel_ != proc_samples_per_channel_) {
178 // Create an intermediate buffer for resampling.
179 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
180 num_proc_channels_));
181 }
182
183 if (input_samples_per_channel_ != proc_samples_per_channel_) {
184 input_resamplers_.reserve(num_proc_channels_);
185 for (int i = 0; i < num_proc_channels_; ++i) {
186 input_resamplers_.push_back(
187 new PushSincResampler(input_samples_per_channel_,
188 proc_samples_per_channel_));
189 }
190 }
191
192 if (output_samples_per_channel_ != proc_samples_per_channel_) {
193 output_resamplers_.reserve(num_proc_channels_);
194 for (int i = 0; i < num_proc_channels_; ++i) {
195 output_resamplers_.push_back(
196 new PushSincResampler(proc_samples_per_channel_,
197 output_samples_per_channel_));
198 }
199 }
200
201 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
202 samples_per_split_channel_ = kSamplesPer16kHzChannel;
203 split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_,
204 num_proc_channels_));
205 filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
206 }
207 }
208
~AudioBuffer()209 AudioBuffer::~AudioBuffer() {}
210
CopyFrom(const float * const * data,int samples_per_channel,AudioProcessing::ChannelLayout layout)211 void AudioBuffer::CopyFrom(const float* const* data,
212 int samples_per_channel,
213 AudioProcessing::ChannelLayout layout) {
214 assert(samples_per_channel == input_samples_per_channel_);
215 assert(ChannelsFromLayout(layout) == num_input_channels_);
216 InitForNewData();
217
218 if (HasKeyboardChannel(layout)) {
219 keyboard_data_ = data[KeyboardChannelIndex(layout)];
220 }
221
222 // Downmix.
223 const float* const* data_ptr = data;
224 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
225 StereoToMono(data[0],
226 data[1],
227 input_buffer_->channel(0),
228 input_samples_per_channel_);
229 data_ptr = input_buffer_->channels();
230 }
231
232 // Resample.
233 if (input_samples_per_channel_ != proc_samples_per_channel_) {
234 for (int i = 0; i < num_proc_channels_; ++i) {
235 input_resamplers_[i]->Resample(data_ptr[i],
236 input_samples_per_channel_,
237 process_buffer_->channel(i),
238 proc_samples_per_channel_);
239 }
240 data_ptr = process_buffer_->channels();
241 }
242
243 // Convert to int16.
244 for (int i = 0; i < num_proc_channels_; ++i) {
245 ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
246 channels_->ibuf()->channel(i));
247 }
248 }
249
CopyTo(int samples_per_channel,AudioProcessing::ChannelLayout layout,float * const * data)250 void AudioBuffer::CopyTo(int samples_per_channel,
251 AudioProcessing::ChannelLayout layout,
252 float* const* data) {
253 assert(samples_per_channel == output_samples_per_channel_);
254 assert(ChannelsFromLayout(layout) == num_proc_channels_);
255
256 // Convert to float.
257 float* const* data_ptr = data;
258 if (output_samples_per_channel_ != proc_samples_per_channel_) {
259 // Convert to an intermediate buffer for subsequent resampling.
260 data_ptr = process_buffer_->channels();
261 }
262 for (int i = 0; i < num_proc_channels_; ++i) {
263 ScaleToFloat(channels_->ibuf()->channel(i),
264 proc_samples_per_channel_,
265 data_ptr[i]);
266 }
267
268 // Resample.
269 if (output_samples_per_channel_ != proc_samples_per_channel_) {
270 for (int i = 0; i < num_proc_channels_; ++i) {
271 output_resamplers_[i]->Resample(data_ptr[i],
272 proc_samples_per_channel_,
273 data[i],
274 output_samples_per_channel_);
275 }
276 }
277 }
278
InitForNewData()279 void AudioBuffer::InitForNewData() {
280 keyboard_data_ = NULL;
281 num_mixed_channels_ = 0;
282 num_mixed_low_pass_channels_ = 0;
283 reference_copied_ = false;
284 activity_ = AudioFrame::kVadUnknown;
285 }
286
data(int channel) const287 const int16_t* AudioBuffer::data(int channel) const {
288 assert(channel >= 0 && channel < num_proc_channels_);
289 return channels_->ibuf()->channel(channel);
290 }
291
data(int channel)292 int16_t* AudioBuffer::data(int channel) {
293 const AudioBuffer* t = this;
294 return const_cast<int16_t*>(t->data(channel));
295 }
296
data_f(int channel)297 float* AudioBuffer::data_f(int channel) {
298 assert(channel >= 0 && channel < num_proc_channels_);
299 return channels_->fbuf()->channel(channel);
300 }
301
low_pass_split_data(int channel) const302 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
303 assert(channel >= 0 && channel < num_proc_channels_);
304 return split_channels_.get() ? split_channels_->low_channel(channel)
305 : data(channel);
306 }
307
low_pass_split_data(int channel)308 int16_t* AudioBuffer::low_pass_split_data(int channel) {
309 const AudioBuffer* t = this;
310 return const_cast<int16_t*>(t->low_pass_split_data(channel));
311 }
312
low_pass_split_data_f(int channel)313 float* AudioBuffer::low_pass_split_data_f(int channel) {
314 assert(channel >= 0 && channel < num_proc_channels_);
315 return split_channels_.get() ? split_channels_->low_channel_f(channel)
316 : data_f(channel);
317 }
318
high_pass_split_data(int channel) const319 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
320 assert(channel >= 0 && channel < num_proc_channels_);
321 return split_channels_.get() ? split_channels_->high_channel(channel) : NULL;
322 }
323
high_pass_split_data(int channel)324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
325 const AudioBuffer* t = this;
326 return const_cast<int16_t*>(t->high_pass_split_data(channel));
327 }
328
high_pass_split_data_f(int channel)329 float* AudioBuffer::high_pass_split_data_f(int channel) {
330 assert(channel >= 0 && channel < num_proc_channels_);
331 return split_channels_.get() ? split_channels_->high_channel_f(channel)
332 : NULL;
333 }
334
mixed_data(int channel) const335 const int16_t* AudioBuffer::mixed_data(int channel) const {
336 assert(channel >= 0 && channel < num_mixed_channels_);
337
338 return mixed_channels_->channel(channel);
339 }
340
mixed_low_pass_data(int channel) const341 const int16_t* AudioBuffer::mixed_low_pass_data(int channel) const {
342 assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
343
344 return mixed_low_pass_channels_->channel(channel);
345 }
346
low_pass_reference(int channel) const347 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
348 assert(channel >= 0 && channel < num_proc_channels_);
349 if (!reference_copied_) {
350 return NULL;
351 }
352
353 return low_pass_reference_channels_->channel(channel);
354 }
355
keyboard_data() const356 const float* AudioBuffer::keyboard_data() const {
357 return keyboard_data_;
358 }
359
filter_states(int channel)360 SplitFilterStates* AudioBuffer::filter_states(int channel) {
361 assert(channel >= 0 && channel < num_proc_channels_);
362 return &filter_states_[channel];
363 }
364
set_activity(AudioFrame::VADActivity activity)365 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
366 activity_ = activity;
367 }
368
activity() const369 AudioFrame::VADActivity AudioBuffer::activity() const {
370 return activity_;
371 }
372
num_channels() const373 int AudioBuffer::num_channels() const {
374 return num_proc_channels_;
375 }
376
samples_per_channel() const377 int AudioBuffer::samples_per_channel() const {
378 return proc_samples_per_channel_;
379 }
380
samples_per_split_channel() const381 int AudioBuffer::samples_per_split_channel() const {
382 return samples_per_split_channel_;
383 }
384
samples_per_keyboard_channel() const385 int AudioBuffer::samples_per_keyboard_channel() const {
386 // We don't resample the keyboard channel.
387 return input_samples_per_channel_;
388 }
389
390 // TODO(andrew): Do deinterleaving and mixing in one step?
DeinterleaveFrom(AudioFrame * frame)391 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
392 assert(proc_samples_per_channel_ == input_samples_per_channel_);
393 assert(num_proc_channels_ == num_input_channels_);
394 assert(frame->num_channels_ == num_proc_channels_);
395 assert(frame->samples_per_channel_ == proc_samples_per_channel_);
396 InitForNewData();
397 activity_ = frame->vad_activity_;
398
399 int16_t* interleaved = frame->data_;
400 for (int i = 0; i < num_proc_channels_; i++) {
401 int16_t* deinterleaved = channels_->ibuf()->channel(i);
402 int interleaved_idx = i;
403 for (int j = 0; j < proc_samples_per_channel_; j++) {
404 deinterleaved[j] = interleaved[interleaved_idx];
405 interleaved_idx += num_proc_channels_;
406 }
407 }
408 }
409
InterleaveTo(AudioFrame * frame,bool data_changed) const410 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
411 assert(proc_samples_per_channel_ == output_samples_per_channel_);
412 assert(num_proc_channels_ == num_input_channels_);
413 assert(frame->num_channels_ == num_proc_channels_);
414 assert(frame->samples_per_channel_ == proc_samples_per_channel_);
415 frame->vad_activity_ = activity_;
416
417 if (!data_changed) {
418 return;
419 }
420
421 int16_t* interleaved = frame->data_;
422 for (int i = 0; i < num_proc_channels_; i++) {
423 int16_t* deinterleaved = channels_->ibuf()->channel(i);
424 int interleaved_idx = i;
425 for (int j = 0; j < proc_samples_per_channel_; j++) {
426 interleaved[interleaved_idx] = deinterleaved[j];
427 interleaved_idx += num_proc_channels_;
428 }
429 }
430 }
431
CopyAndMix(int num_mixed_channels)432 void AudioBuffer::CopyAndMix(int num_mixed_channels) {
433 // We currently only support the stereo to mono case.
434 assert(num_proc_channels_ == 2);
435 assert(num_mixed_channels == 1);
436 if (!mixed_channels_.get()) {
437 mixed_channels_.reset(
438 new ChannelBuffer<int16_t>(proc_samples_per_channel_,
439 num_mixed_channels));
440 }
441
442 StereoToMono(channels_->ibuf()->channel(0),
443 channels_->ibuf()->channel(1),
444 mixed_channels_->channel(0),
445 proc_samples_per_channel_);
446
447 num_mixed_channels_ = num_mixed_channels;
448 }
449
CopyAndMixLowPass(int num_mixed_channels)450 void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
451 // We currently only support the stereo to mono case.
452 assert(num_proc_channels_ == 2);
453 assert(num_mixed_channels == 1);
454 if (!mixed_low_pass_channels_.get()) {
455 mixed_low_pass_channels_.reset(
456 new ChannelBuffer<int16_t>(samples_per_split_channel_,
457 num_mixed_channels));
458 }
459
460 StereoToMono(low_pass_split_data(0),
461 low_pass_split_data(1),
462 mixed_low_pass_channels_->channel(0),
463 samples_per_split_channel_);
464
465 num_mixed_low_pass_channels_ = num_mixed_channels;
466 }
467
CopyLowPassToReference()468 void AudioBuffer::CopyLowPassToReference() {
469 reference_copied_ = true;
470 if (!low_pass_reference_channels_.get()) {
471 low_pass_reference_channels_.reset(
472 new ChannelBuffer<int16_t>(samples_per_split_channel_,
473 num_proc_channels_));
474 }
475 for (int i = 0; i < num_proc_channels_; i++) {
476 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
477 }
478 }
479
480 } // namespace webrtc
481