1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
12
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16 #include "webrtc/common_audio/channel_buffer.h"
17 #include "webrtc/modules/audio_processing/common.h"
18
19 namespace webrtc {
20 namespace {
21
22 const size_t kSamplesPer16kHzChannel = 160;
23 const size_t kSamplesPer32kHzChannel = 320;
24 const size_t kSamplesPer48kHzChannel = 480;
25
KeyboardChannelIndex(const StreamConfig & stream_config)26 int KeyboardChannelIndex(const StreamConfig& stream_config) {
27 if (!stream_config.has_keyboard()) {
28 assert(false);
29 return 0;
30 }
31
32 return stream_config.num_channels();
33 }
34
NumBandsFromSamplesPerChannel(size_t num_frames)35 size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
36 size_t num_bands = 1;
37 if (num_frames == kSamplesPer32kHzChannel ||
38 num_frames == kSamplesPer48kHzChannel) {
39 num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
40 }
41 return num_bands;
42 }
43
44 } // namespace
45
AudioBuffer(size_t input_num_frames,size_t num_input_channels,size_t process_num_frames,size_t num_process_channels,size_t output_num_frames)46 AudioBuffer::AudioBuffer(size_t input_num_frames,
47 size_t num_input_channels,
48 size_t process_num_frames,
49 size_t num_process_channels,
50 size_t output_num_frames)
51 : input_num_frames_(input_num_frames),
52 num_input_channels_(num_input_channels),
53 proc_num_frames_(process_num_frames),
54 num_proc_channels_(num_process_channels),
55 output_num_frames_(output_num_frames),
56 num_channels_(num_process_channels),
57 num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
58 num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
59 mixed_low_pass_valid_(false),
60 reference_copied_(false),
61 activity_(AudioFrame::kVadUnknown),
62 keyboard_data_(NULL),
63 data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
64 assert(input_num_frames_ > 0);
65 assert(proc_num_frames_ > 0);
66 assert(output_num_frames_ > 0);
67 assert(num_input_channels_ > 0);
68 assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
69
70 if (input_num_frames_ != proc_num_frames_ ||
71 output_num_frames_ != proc_num_frames_) {
72 // Create an intermediate buffer for resampling.
73 process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
74 num_proc_channels_));
75
76 if (input_num_frames_ != proc_num_frames_) {
77 for (size_t i = 0; i < num_proc_channels_; ++i) {
78 input_resamplers_.push_back(
79 new PushSincResampler(input_num_frames_,
80 proc_num_frames_));
81 }
82 }
83
84 if (output_num_frames_ != proc_num_frames_) {
85 for (size_t i = 0; i < num_proc_channels_; ++i) {
86 output_resamplers_.push_back(
87 new PushSincResampler(proc_num_frames_,
88 output_num_frames_));
89 }
90 }
91 }
92
93 if (num_bands_ > 1) {
94 split_data_.reset(new IFChannelBuffer(proc_num_frames_,
95 num_proc_channels_,
96 num_bands_));
97 splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
98 num_bands_,
99 proc_num_frames_));
100 }
101 }
102
~AudioBuffer()103 AudioBuffer::~AudioBuffer() {}
104
CopyFrom(const float * const * data,const StreamConfig & stream_config)105 void AudioBuffer::CopyFrom(const float* const* data,
106 const StreamConfig& stream_config) {
107 assert(stream_config.num_frames() == input_num_frames_);
108 assert(stream_config.num_channels() == num_input_channels_);
109 InitForNewData();
110 // Initialized lazily because there's a different condition in
111 // DeinterleaveFrom.
112 const bool need_to_downmix =
113 num_input_channels_ > 1 && num_proc_channels_ == 1;
114 if (need_to_downmix && !input_buffer_) {
115 input_buffer_.reset(
116 new IFChannelBuffer(input_num_frames_, num_proc_channels_));
117 }
118
119 if (stream_config.has_keyboard()) {
120 keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
121 }
122
123 // Downmix.
124 const float* const* data_ptr = data;
125 if (need_to_downmix) {
126 DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
127 input_buffer_->fbuf()->channels()[0]);
128 data_ptr = input_buffer_->fbuf_const()->channels();
129 }
130
131 // Resample.
132 if (input_num_frames_ != proc_num_frames_) {
133 for (size_t i = 0; i < num_proc_channels_; ++i) {
134 input_resamplers_[i]->Resample(data_ptr[i],
135 input_num_frames_,
136 process_buffer_->channels()[i],
137 proc_num_frames_);
138 }
139 data_ptr = process_buffer_->channels();
140 }
141
142 // Convert to the S16 range.
143 for (size_t i = 0; i < num_proc_channels_; ++i) {
144 FloatToFloatS16(data_ptr[i],
145 proc_num_frames_,
146 data_->fbuf()->channels()[i]);
147 }
148 }
149
CopyTo(const StreamConfig & stream_config,float * const * data)150 void AudioBuffer::CopyTo(const StreamConfig& stream_config,
151 float* const* data) {
152 assert(stream_config.num_frames() == output_num_frames_);
153 assert(stream_config.num_channels() == num_channels_ || num_channels_ == 1);
154
155 // Convert to the float range.
156 float* const* data_ptr = data;
157 if (output_num_frames_ != proc_num_frames_) {
158 // Convert to an intermediate buffer for subsequent resampling.
159 data_ptr = process_buffer_->channels();
160 }
161 for (size_t i = 0; i < num_channels_; ++i) {
162 FloatS16ToFloat(data_->fbuf()->channels()[i],
163 proc_num_frames_,
164 data_ptr[i]);
165 }
166
167 // Resample.
168 if (output_num_frames_ != proc_num_frames_) {
169 for (size_t i = 0; i < num_channels_; ++i) {
170 output_resamplers_[i]->Resample(data_ptr[i],
171 proc_num_frames_,
172 data[i],
173 output_num_frames_);
174 }
175 }
176
177 // Upmix.
178 for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) {
179 memcpy(data[i], data[0], output_num_frames_ * sizeof(**data));
180 }
181 }
182
InitForNewData()183 void AudioBuffer::InitForNewData() {
184 keyboard_data_ = NULL;
185 mixed_low_pass_valid_ = false;
186 reference_copied_ = false;
187 activity_ = AudioFrame::kVadUnknown;
188 num_channels_ = num_proc_channels_;
189 }
190
channels_const() const191 const int16_t* const* AudioBuffer::channels_const() const {
192 return data_->ibuf_const()->channels();
193 }
194
channels()195 int16_t* const* AudioBuffer::channels() {
196 mixed_low_pass_valid_ = false;
197 return data_->ibuf()->channels();
198 }
199
split_bands_const(size_t channel) const200 const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const {
201 return split_data_.get() ?
202 split_data_->ibuf_const()->bands(channel) :
203 data_->ibuf_const()->bands(channel);
204 }
205
split_bands(size_t channel)206 int16_t* const* AudioBuffer::split_bands(size_t channel) {
207 mixed_low_pass_valid_ = false;
208 return split_data_.get() ?
209 split_data_->ibuf()->bands(channel) :
210 data_->ibuf()->bands(channel);
211 }
212
split_channels_const(Band band) const213 const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
214 if (split_data_.get()) {
215 return split_data_->ibuf_const()->channels(band);
216 } else {
217 return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
218 }
219 }
220
split_channels(Band band)221 int16_t* const* AudioBuffer::split_channels(Band band) {
222 mixed_low_pass_valid_ = false;
223 if (split_data_.get()) {
224 return split_data_->ibuf()->channels(band);
225 } else {
226 return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
227 }
228 }
229
data()230 ChannelBuffer<int16_t>* AudioBuffer::data() {
231 mixed_low_pass_valid_ = false;
232 return data_->ibuf();
233 }
234
data() const235 const ChannelBuffer<int16_t>* AudioBuffer::data() const {
236 return data_->ibuf_const();
237 }
238
split_data()239 ChannelBuffer<int16_t>* AudioBuffer::split_data() {
240 mixed_low_pass_valid_ = false;
241 return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
242 }
243
split_data() const244 const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
245 return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
246 }
247
channels_const_f() const248 const float* const* AudioBuffer::channels_const_f() const {
249 return data_->fbuf_const()->channels();
250 }
251
channels_f()252 float* const* AudioBuffer::channels_f() {
253 mixed_low_pass_valid_ = false;
254 return data_->fbuf()->channels();
255 }
256
split_bands_const_f(size_t channel) const257 const float* const* AudioBuffer::split_bands_const_f(size_t channel) const {
258 return split_data_.get() ?
259 split_data_->fbuf_const()->bands(channel) :
260 data_->fbuf_const()->bands(channel);
261 }
262
split_bands_f(size_t channel)263 float* const* AudioBuffer::split_bands_f(size_t channel) {
264 mixed_low_pass_valid_ = false;
265 return split_data_.get() ?
266 split_data_->fbuf()->bands(channel) :
267 data_->fbuf()->bands(channel);
268 }
269
split_channels_const_f(Band band) const270 const float* const* AudioBuffer::split_channels_const_f(Band band) const {
271 if (split_data_.get()) {
272 return split_data_->fbuf_const()->channels(band);
273 } else {
274 return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
275 }
276 }
277
split_channels_f(Band band)278 float* const* AudioBuffer::split_channels_f(Band band) {
279 mixed_low_pass_valid_ = false;
280 if (split_data_.get()) {
281 return split_data_->fbuf()->channels(band);
282 } else {
283 return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
284 }
285 }
286
data_f()287 ChannelBuffer<float>* AudioBuffer::data_f() {
288 mixed_low_pass_valid_ = false;
289 return data_->fbuf();
290 }
291
data_f() const292 const ChannelBuffer<float>* AudioBuffer::data_f() const {
293 return data_->fbuf_const();
294 }
295
split_data_f()296 ChannelBuffer<float>* AudioBuffer::split_data_f() {
297 mixed_low_pass_valid_ = false;
298 return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
299 }
300
split_data_f() const301 const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
302 return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
303 }
304
mixed_low_pass_data()305 const int16_t* AudioBuffer::mixed_low_pass_data() {
306 if (num_proc_channels_ == 1) {
307 return split_bands_const(0)[kBand0To8kHz];
308 }
309
310 if (!mixed_low_pass_valid_) {
311 if (!mixed_low_pass_channels_.get()) {
312 mixed_low_pass_channels_.reset(
313 new ChannelBuffer<int16_t>(num_split_frames_, 1));
314 }
315
316 DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
317 num_split_frames_, num_channels_,
318 mixed_low_pass_channels_->channels()[0]);
319 mixed_low_pass_valid_ = true;
320 }
321 return mixed_low_pass_channels_->channels()[0];
322 }
323
low_pass_reference(int channel) const324 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
325 if (!reference_copied_) {
326 return NULL;
327 }
328
329 return low_pass_reference_channels_->channels()[channel];
330 }
331
keyboard_data() const332 const float* AudioBuffer::keyboard_data() const {
333 return keyboard_data_;
334 }
335
set_activity(AudioFrame::VADActivity activity)336 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
337 activity_ = activity;
338 }
339
activity() const340 AudioFrame::VADActivity AudioBuffer::activity() const {
341 return activity_;
342 }
343
num_channels() const344 size_t AudioBuffer::num_channels() const {
345 return num_channels_;
346 }
347
set_num_channels(size_t num_channels)348 void AudioBuffer::set_num_channels(size_t num_channels) {
349 num_channels_ = num_channels;
350 }
351
num_frames() const352 size_t AudioBuffer::num_frames() const {
353 return proc_num_frames_;
354 }
355
num_frames_per_band() const356 size_t AudioBuffer::num_frames_per_band() const {
357 return num_split_frames_;
358 }
359
num_keyboard_frames() const360 size_t AudioBuffer::num_keyboard_frames() const {
361 // We don't resample the keyboard channel.
362 return input_num_frames_;
363 }
364
num_bands() const365 size_t AudioBuffer::num_bands() const {
366 return num_bands_;
367 }
368
369 // The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
DeinterleaveFrom(AudioFrame * frame)370 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
371 assert(frame->num_channels_ == num_input_channels_);
372 assert(frame->samples_per_channel_ == input_num_frames_);
373 InitForNewData();
374 // Initialized lazily because there's a different condition in CopyFrom.
375 if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
376 input_buffer_.reset(
377 new IFChannelBuffer(input_num_frames_, num_proc_channels_));
378 }
379 activity_ = frame->vad_activity_;
380
381 int16_t* const* deinterleaved;
382 if (input_num_frames_ == proc_num_frames_) {
383 deinterleaved = data_->ibuf()->channels();
384 } else {
385 deinterleaved = input_buffer_->ibuf()->channels();
386 }
387 if (num_proc_channels_ == 1) {
388 // Downmix and deinterleave simultaneously.
389 DownmixInterleavedToMono(frame->data_, input_num_frames_,
390 num_input_channels_, deinterleaved[0]);
391 } else {
392 assert(num_proc_channels_ == num_input_channels_);
393 Deinterleave(frame->data_,
394 input_num_frames_,
395 num_proc_channels_,
396 deinterleaved);
397 }
398
399 // Resample.
400 if (input_num_frames_ != proc_num_frames_) {
401 for (size_t i = 0; i < num_proc_channels_; ++i) {
402 input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
403 input_num_frames_,
404 data_->fbuf()->channels()[i],
405 proc_num_frames_);
406 }
407 }
408 }
409
InterleaveTo(AudioFrame * frame,bool data_changed)410 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
411 frame->vad_activity_ = activity_;
412 if (!data_changed) {
413 return;
414 }
415
416 assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
417 assert(frame->samples_per_channel_ == output_num_frames_);
418
419 // Resample if necessary.
420 IFChannelBuffer* data_ptr = data_.get();
421 if (proc_num_frames_ != output_num_frames_) {
422 if (!output_buffer_) {
423 output_buffer_.reset(
424 new IFChannelBuffer(output_num_frames_, num_channels_));
425 }
426 for (size_t i = 0; i < num_channels_; ++i) {
427 output_resamplers_[i]->Resample(
428 data_->fbuf()->channels()[i], proc_num_frames_,
429 output_buffer_->fbuf()->channels()[i], output_num_frames_);
430 }
431 data_ptr = output_buffer_.get();
432 }
433
434 if (frame->num_channels_ == num_channels_) {
435 Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
436 frame->data_);
437 } else {
438 UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
439 frame->num_channels_, frame->data_);
440 }
441 }
442
CopyLowPassToReference()443 void AudioBuffer::CopyLowPassToReference() {
444 reference_copied_ = true;
445 if (!low_pass_reference_channels_.get() ||
446 low_pass_reference_channels_->num_channels() != num_channels_) {
447 low_pass_reference_channels_.reset(
448 new ChannelBuffer<int16_t>(num_split_frames_,
449 num_proc_channels_));
450 }
451 for (size_t i = 0; i < num_proc_channels_; i++) {
452 memcpy(low_pass_reference_channels_->channels()[i],
453 split_bands_const(i)[kBand0To8kHz],
454 low_pass_reference_channels_->num_frames_per_band() *
455 sizeof(split_bands_const(i)[kBand0To8kHz][0]));
456 }
457 }
458
SplitIntoFrequencyBands()459 void AudioBuffer::SplitIntoFrequencyBands() {
460 splitting_filter_->Analysis(data_.get(), split_data_.get());
461 }
462
MergeFrequencyBands()463 void AudioBuffer::MergeFrequencyBands() {
464 splitting_filter_->Synthesis(split_data_.get(), data_.get());
465 }
466
467 } // namespace webrtc
468