1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio_processing_impl.h"
12
13 #include <assert.h>
14
15 #include "audio_buffer.h"
16 #include "critical_section_wrapper.h"
17 #include "echo_cancellation_impl.h"
18 #include "echo_control_mobile_impl.h"
19 #include "file_wrapper.h"
20 #include "high_pass_filter_impl.h"
21 #include "gain_control_impl.h"
22 #include "level_estimator_impl.h"
23 #include "module_common_types.h"
24 #include "noise_suppression_impl.h"
25 #include "processing_component.h"
26 #include "splitting_filter.h"
27 #include "voice_detection_impl.h"
28
29 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
30 // Files generated at build-time by the protobuf compiler.
31 #ifdef WEBRTC_ANDROID
32 #include "external/webrtc/src/modules/audio_processing/debug.pb.h"
33 #else
34 #include "webrtc/audio_processing/debug.pb.h"
35 #endif
36 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
37
38 namespace webrtc {
Create(int id)39 AudioProcessing* AudioProcessing::Create(int id) {
40 /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
41 webrtc::kTraceAudioProcessing,
42 id,
43 "AudioProcessing::Create()");*/
44
45 AudioProcessingImpl* apm = new AudioProcessingImpl(id);
46 if (apm->Initialize() != kNoError) {
47 delete apm;
48 apm = NULL;
49 }
50
51 return apm;
52 }
53
Destroy(AudioProcessing * apm)54 void AudioProcessing::Destroy(AudioProcessing* apm) {
55 delete static_cast<AudioProcessingImpl*>(apm);
56 }
57
AudioProcessingImpl(int id)58 AudioProcessingImpl::AudioProcessingImpl(int id)
59 : id_(id),
60 echo_cancellation_(NULL),
61 echo_control_mobile_(NULL),
62 gain_control_(NULL),
63 high_pass_filter_(NULL),
64 level_estimator_(NULL),
65 noise_suppression_(NULL),
66 voice_detection_(NULL),
67 crit_(CriticalSectionWrapper::CreateCriticalSection()),
68 render_audio_(NULL),
69 capture_audio_(NULL),
70 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
71 debug_file_(FileWrapper::Create()),
72 event_msg_(new audioproc::Event()),
73 #endif
74 sample_rate_hz_(kSampleRate16kHz),
75 split_sample_rate_hz_(kSampleRate16kHz),
76 samples_per_channel_(sample_rate_hz_ / 100),
77 stream_delay_ms_(0),
78 was_stream_delay_set_(false),
79 num_reverse_channels_(1),
80 num_input_channels_(1),
81 num_output_channels_(1) {
82
83 echo_cancellation_ = new EchoCancellationImpl(this);
84 component_list_.push_back(echo_cancellation_);
85
86 echo_control_mobile_ = new EchoControlMobileImpl(this);
87 component_list_.push_back(echo_control_mobile_);
88
89 gain_control_ = new GainControlImpl(this);
90 component_list_.push_back(gain_control_);
91
92 high_pass_filter_ = new HighPassFilterImpl(this);
93 component_list_.push_back(high_pass_filter_);
94
95 level_estimator_ = new LevelEstimatorImpl(this);
96 component_list_.push_back(level_estimator_);
97
98 noise_suppression_ = new NoiseSuppressionImpl(this);
99 component_list_.push_back(noise_suppression_);
100
101 voice_detection_ = new VoiceDetectionImpl(this);
102 component_list_.push_back(voice_detection_);
103 }
104
~AudioProcessingImpl()105 AudioProcessingImpl::~AudioProcessingImpl() {
106 while (!component_list_.empty()) {
107 ProcessingComponent* component = component_list_.front();
108 component->Destroy();
109 delete component;
110 component_list_.pop_front();
111 }
112
113 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
114 if (debug_file_->Open()) {
115 debug_file_->CloseFile();
116 }
117 #endif
118
119 delete crit_;
120 crit_ = NULL;
121
122 if (render_audio_) {
123 delete render_audio_;
124 render_audio_ = NULL;
125 }
126
127 if (capture_audio_) {
128 delete capture_audio_;
129 capture_audio_ = NULL;
130 }
131 }
132
crit() const133 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
134 return crit_;
135 }
136
split_sample_rate_hz() const137 int AudioProcessingImpl::split_sample_rate_hz() const {
138 return split_sample_rate_hz_;
139 }
140
Initialize()141 int AudioProcessingImpl::Initialize() {
142 CriticalSectionScoped crit_scoped(*crit_);
143 return InitializeLocked();
144 }
145
InitializeLocked()146 int AudioProcessingImpl::InitializeLocked() {
147 if (render_audio_ != NULL) {
148 delete render_audio_;
149 render_audio_ = NULL;
150 }
151
152 if (capture_audio_ != NULL) {
153 delete capture_audio_;
154 capture_audio_ = NULL;
155 }
156
157 render_audio_ = new AudioBuffer(num_reverse_channels_,
158 samples_per_channel_);
159 capture_audio_ = new AudioBuffer(num_input_channels_,
160 samples_per_channel_);
161
162 was_stream_delay_set_ = false;
163
164 // Initialize all components.
165 std::list<ProcessingComponent*>::iterator it;
166 for (it = component_list_.begin(); it != component_list_.end(); it++) {
167 int err = (*it)->Initialize();
168 if (err != kNoError) {
169 return err;
170 }
171 }
172
173 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
174 if (debug_file_->Open()) {
175 int err = WriteInitMessage();
176 if (err != kNoError) {
177 return err;
178 }
179 }
180 #endif
181
182 return kNoError;
183 }
184
set_sample_rate_hz(int rate)185 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
186 CriticalSectionScoped crit_scoped(*crit_);
187 if (rate != kSampleRate8kHz &&
188 rate != kSampleRate16kHz &&
189 rate != kSampleRate32kHz) {
190 return kBadParameterError;
191 }
192
193 sample_rate_hz_ = rate;
194 samples_per_channel_ = rate / 100;
195
196 if (sample_rate_hz_ == kSampleRate32kHz) {
197 split_sample_rate_hz_ = kSampleRate16kHz;
198 } else {
199 split_sample_rate_hz_ = sample_rate_hz_;
200 }
201
202 return InitializeLocked();
203 }
204
sample_rate_hz() const205 int AudioProcessingImpl::sample_rate_hz() const {
206 return sample_rate_hz_;
207 }
208
set_num_reverse_channels(int channels)209 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
210 CriticalSectionScoped crit_scoped(*crit_);
211 // Only stereo supported currently.
212 if (channels > 2 || channels < 1) {
213 return kBadParameterError;
214 }
215
216 num_reverse_channels_ = channels;
217
218 return InitializeLocked();
219 }
220
num_reverse_channels() const221 int AudioProcessingImpl::num_reverse_channels() const {
222 return num_reverse_channels_;
223 }
224
set_num_channels(int input_channels,int output_channels)225 int AudioProcessingImpl::set_num_channels(
226 int input_channels,
227 int output_channels) {
228 CriticalSectionScoped crit_scoped(*crit_);
229 if (output_channels > input_channels) {
230 return kBadParameterError;
231 }
232
233 // Only stereo supported currently.
234 if (input_channels > 2 || input_channels < 1) {
235 return kBadParameterError;
236 }
237
238 if (output_channels > 2 || output_channels < 1) {
239 return kBadParameterError;
240 }
241
242 num_input_channels_ = input_channels;
243 num_output_channels_ = output_channels;
244
245 return InitializeLocked();
246 }
247
num_input_channels() const248 int AudioProcessingImpl::num_input_channels() const {
249 return num_input_channels_;
250 }
251
num_output_channels() const252 int AudioProcessingImpl::num_output_channels() const {
253 return num_output_channels_;
254 }
255
ProcessStream(AudioFrame * frame)256 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
257 CriticalSectionScoped crit_scoped(*crit_);
258 int err = kNoError;
259
260 if (frame == NULL) {
261 return kNullPointerError;
262 }
263
264 if (frame->_frequencyInHz != sample_rate_hz_) {
265 return kBadSampleRateError;
266 }
267
268 if (frame->_audioChannel != num_input_channels_) {
269 return kBadNumberChannelsError;
270 }
271
272 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
273 return kBadDataLengthError;
274 }
275
276 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
277 if (debug_file_->Open()) {
278 event_msg_->set_type(audioproc::Event::STREAM);
279 audioproc::Stream* msg = event_msg_->mutable_stream();
280 const size_t data_size = sizeof(int16_t) *
281 frame->_payloadDataLengthInSamples *
282 frame->_audioChannel;
283 msg->set_input_data(frame->_payloadData, data_size);
284 msg->set_delay(stream_delay_ms_);
285 msg->set_drift(echo_cancellation_->stream_drift_samples());
286 msg->set_level(gain_control_->stream_analog_level());
287 }
288 #endif
289
290 capture_audio_->DeinterleaveFrom(frame);
291
292 // TODO(ajm): experiment with mixing and AEC placement.
293 if (num_output_channels_ < num_input_channels_) {
294 capture_audio_->Mix(num_output_channels_);
295 frame->_audioChannel = num_output_channels_;
296 }
297
298 bool data_changed = stream_data_changed();
299 if (analysis_needed(data_changed)) {
300 for (int i = 0; i < num_output_channels_; i++) {
301 // Split into a low and high band.
302 SplittingFilterAnalysis(capture_audio_->data(i),
303 capture_audio_->low_pass_split_data(i),
304 capture_audio_->high_pass_split_data(i),
305 capture_audio_->analysis_filter_state1(i),
306 capture_audio_->analysis_filter_state2(i));
307 }
308 }
309
310 err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
311 if (err != kNoError) {
312 return err;
313 }
314
315 err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
316 if (err != kNoError) {
317 return err;
318 }
319
320 err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
321 if (err != kNoError) {
322 return err;
323 }
324
325 if (echo_control_mobile_->is_enabled() &&
326 noise_suppression_->is_enabled()) {
327 capture_audio_->CopyLowPassToReference();
328 }
329
330 err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
331 if (err != kNoError) {
332 return err;
333 }
334
335 err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
336 if (err != kNoError) {
337 return err;
338 }
339
340 err = voice_detection_->ProcessCaptureAudio(capture_audio_);
341 if (err != kNoError) {
342 return err;
343 }
344
345 err = gain_control_->ProcessCaptureAudio(capture_audio_);
346 if (err != kNoError) {
347 return err;
348 }
349
350 if (synthesis_needed(data_changed)) {
351 for (int i = 0; i < num_output_channels_; i++) {
352 // Recombine low and high bands.
353 SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
354 capture_audio_->high_pass_split_data(i),
355 capture_audio_->data(i),
356 capture_audio_->synthesis_filter_state1(i),
357 capture_audio_->synthesis_filter_state2(i));
358 }
359 }
360
361 // The level estimator operates on the recombined data.
362 err = level_estimator_->ProcessStream(capture_audio_);
363 if (err != kNoError) {
364 return err;
365 }
366
367 capture_audio_->InterleaveTo(frame, data_changed);
368
369 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
370 if (debug_file_->Open()) {
371 audioproc::Stream* msg = event_msg_->mutable_stream();
372 const size_t data_size = sizeof(int16_t) *
373 frame->_payloadDataLengthInSamples *
374 frame->_audioChannel;
375 msg->set_output_data(frame->_payloadData, data_size);
376 err = WriteMessageToDebugFile();
377 if (err != kNoError) {
378 return err;
379 }
380 }
381 #endif
382
383 was_stream_delay_set_ = false;
384 return kNoError;
385 }
386
AnalyzeReverseStream(AudioFrame * frame)387 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
388 CriticalSectionScoped crit_scoped(*crit_);
389 int err = kNoError;
390
391 if (frame == NULL) {
392 return kNullPointerError;
393 }
394
395 if (frame->_frequencyInHz != sample_rate_hz_) {
396 return kBadSampleRateError;
397 }
398
399 if (frame->_audioChannel != num_reverse_channels_) {
400 return kBadNumberChannelsError;
401 }
402
403 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
404 return kBadDataLengthError;
405 }
406
407 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
408 if (debug_file_->Open()) {
409 event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
410 audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
411 const size_t data_size = sizeof(int16_t) *
412 frame->_payloadDataLengthInSamples *
413 frame->_audioChannel;
414 msg->set_data(frame->_payloadData, data_size);
415 err = WriteMessageToDebugFile();
416 if (err != kNoError) {
417 return err;
418 }
419 }
420 #endif
421
422 render_audio_->DeinterleaveFrom(frame);
423
424 // TODO(ajm): turn the splitting filter into a component?
425 if (sample_rate_hz_ == kSampleRate32kHz) {
426 for (int i = 0; i < num_reverse_channels_; i++) {
427 // Split into low and high band.
428 SplittingFilterAnalysis(render_audio_->data(i),
429 render_audio_->low_pass_split_data(i),
430 render_audio_->high_pass_split_data(i),
431 render_audio_->analysis_filter_state1(i),
432 render_audio_->analysis_filter_state2(i));
433 }
434 }
435
436 // TODO(ajm): warnings possible from components?
437 err = echo_cancellation_->ProcessRenderAudio(render_audio_);
438 if (err != kNoError) {
439 return err;
440 }
441
442 err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
443 if (err != kNoError) {
444 return err;
445 }
446
447 err = gain_control_->ProcessRenderAudio(render_audio_);
448 if (err != kNoError) {
449 return err;
450 }
451
452 return err; // TODO(ajm): this is for returning warnings; necessary?
453 }
454
set_stream_delay_ms(int delay)455 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
456 was_stream_delay_set_ = true;
457 if (delay < 0) {
458 return kBadParameterError;
459 }
460
461 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
462 if (delay > 500) {
463 stream_delay_ms_ = 500;
464 return kBadStreamParameterWarning;
465 }
466
467 stream_delay_ms_ = delay;
468 return kNoError;
469 }
470
stream_delay_ms() const471 int AudioProcessingImpl::stream_delay_ms() const {
472 return stream_delay_ms_;
473 }
474
was_stream_delay_set() const475 bool AudioProcessingImpl::was_stream_delay_set() const {
476 return was_stream_delay_set_;
477 }
478
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])479 int AudioProcessingImpl::StartDebugRecording(
480 const char filename[AudioProcessing::kMaxFilenameSize]) {
481 CriticalSectionScoped crit_scoped(*crit_);
482 assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
483
484 if (filename == NULL) {
485 return kNullPointerError;
486 }
487
488 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
489 // Stop any ongoing recording.
490 if (debug_file_->Open()) {
491 if (debug_file_->CloseFile() == -1) {
492 return kFileError;
493 }
494 }
495
496 if (debug_file_->OpenFile(filename, false) == -1) {
497 debug_file_->CloseFile();
498 return kFileError;
499 }
500
501 int err = WriteInitMessage();
502 if (err != kNoError) {
503 return err;
504 }
505 return kNoError;
506 #else
507 return kUnsupportedFunctionError;
508 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
509 }
510
StopDebugRecording()511 int AudioProcessingImpl::StopDebugRecording() {
512 CriticalSectionScoped crit_scoped(*crit_);
513
514 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
515 // We just return if recording hasn't started.
516 if (debug_file_->Open()) {
517 if (debug_file_->CloseFile() == -1) {
518 return kFileError;
519 }
520 }
521 return kNoError;
522 #else
523 return kUnsupportedFunctionError;
524 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
525 }
526
echo_cancellation() const527 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
528 return echo_cancellation_;
529 }
530
echo_control_mobile() const531 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
532 return echo_control_mobile_;
533 }
534
gain_control() const535 GainControl* AudioProcessingImpl::gain_control() const {
536 return gain_control_;
537 }
538
high_pass_filter() const539 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
540 return high_pass_filter_;
541 }
542
level_estimator() const543 LevelEstimator* AudioProcessingImpl::level_estimator() const {
544 return level_estimator_;
545 }
546
noise_suppression() const547 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
548 return noise_suppression_;
549 }
550
voice_detection() const551 VoiceDetection* AudioProcessingImpl::voice_detection() const {
552 return voice_detection_;
553 }
554
ChangeUniqueId(const WebRtc_Word32 id)555 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
556 CriticalSectionScoped crit_scoped(*crit_);
557 /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
558 webrtc::kTraceAudioProcessing,
559 id_,
560 "ChangeUniqueId(new id = %d)",
561 id);*/
562 id_ = id;
563
564 return kNoError;
565 }
566
stream_data_changed() const567 bool AudioProcessingImpl::stream_data_changed() const {
568 int enabled_count = 0;
569 std::list<ProcessingComponent*>::const_iterator it;
570 for (it = component_list_.begin(); it != component_list_.end(); it++) {
571 if ((*it)->is_component_enabled()) {
572 enabled_count++;
573 }
574 }
575
576 // Data is unchanged if no components are enabled, or if only level_estimator_
577 // or voice_detection_ is enabled.
578 if (enabled_count == 0) {
579 return false;
580 } else if (enabled_count == 1) {
581 if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
582 return false;
583 }
584 } else if (enabled_count == 2) {
585 if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
586 return false;
587 }
588 }
589 return true;
590 }
591
synthesis_needed(bool stream_data_changed) const592 bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
593 return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
594 }
595
analysis_needed(bool stream_data_changed) const596 bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
597 if (!stream_data_changed && !voice_detection_->is_enabled()) {
598 // Only level_estimator_ is enabled.
599 return false;
600 } else if (sample_rate_hz_ == kSampleRate32kHz) {
601 // Something besides level_estimator_ is enabled, and we have super-wb.
602 return true;
603 }
604 return false;
605 }
606
607 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile()608 int AudioProcessingImpl::WriteMessageToDebugFile() {
609 int32_t size = event_msg_->ByteSize();
610 if (size <= 0) {
611 return kUnspecifiedError;
612 }
613 #if defined(WEBRTC_BIG_ENDIAN)
614 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
615 // pretty safe in assuming little-endian.
616 #endif
617
618 if (!event_msg_->SerializeToString(&event_str_)) {
619 return kUnspecifiedError;
620 }
621
622 // Write message preceded by its size.
623 if (!debug_file_->Write(&size, sizeof(int32_t))) {
624 return kFileError;
625 }
626 if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
627 return kFileError;
628 }
629
630 event_msg_->Clear();
631
632 return 0;
633 }
634
WriteInitMessage()635 int AudioProcessingImpl::WriteInitMessage() {
636 event_msg_->set_type(audioproc::Event::INIT);
637 audioproc::Init* msg = event_msg_->mutable_init();
638 msg->set_sample_rate(sample_rate_hz_);
639 msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
640 msg->set_num_input_channels(num_input_channels_);
641 msg->set_num_output_channels(num_output_channels_);
642 msg->set_num_reverse_channels(num_reverse_channels_);
643
644 int err = WriteMessageToDebugFile();
645 if (err != kNoError) {
646 return err;
647 }
648
649 return kNoError;
650 }
651 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
652 } // namespace webrtc
653