1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio_processing_impl.h"
12
13 #include <cassert>
14
15 #include "module_common_types.h"
16
17 #include "critical_section_wrapper.h"
18 #include "file_wrapper.h"
19
20 #include "audio_buffer.h"
21 #include "echo_cancellation_impl.h"
22 #include "echo_control_mobile_impl.h"
23 #include "high_pass_filter_impl.h"
24 #include "gain_control_impl.h"
25 #include "level_estimator_impl.h"
26 #include "noise_suppression_impl.h"
27 #include "processing_component.h"
28 #include "splitting_filter.h"
29 #include "voice_detection_impl.h"
30
31 namespace webrtc {
32 namespace {
33
34 enum Events {
35 kInitializeEvent,
36 kRenderEvent,
37 kCaptureEvent
38 };
39
40 const char kMagicNumber[] = "#!vqetrace1.2";
41 } // namespace
42
Create(int id)43 AudioProcessing* AudioProcessing::Create(int id) {
44 /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
45 webrtc::kTraceAudioProcessing,
46 id,
47 "AudioProcessing::Create()");*/
48
49 AudioProcessingImpl* apm = new AudioProcessingImpl(id);
50 if (apm->Initialize() != kNoError) {
51 delete apm;
52 apm = NULL;
53 }
54
55 return apm;
56 }
57
Destroy(AudioProcessing * apm)58 void AudioProcessing::Destroy(AudioProcessing* apm) {
59 delete static_cast<AudioProcessingImpl*>(apm);
60 }
61
AudioProcessingImpl(int id)62 AudioProcessingImpl::AudioProcessingImpl(int id)
63 : id_(id),
64 echo_cancellation_(NULL),
65 echo_control_mobile_(NULL),
66 gain_control_(NULL),
67 high_pass_filter_(NULL),
68 level_estimator_(NULL),
69 noise_suppression_(NULL),
70 voice_detection_(NULL),
71 debug_file_(FileWrapper::Create()),
72 crit_(CriticalSectionWrapper::CreateCriticalSection()),
73 render_audio_(NULL),
74 capture_audio_(NULL),
75 sample_rate_hz_(kSampleRate16kHz),
76 split_sample_rate_hz_(kSampleRate16kHz),
77 samples_per_channel_(sample_rate_hz_ / 100),
78 stream_delay_ms_(0),
79 was_stream_delay_set_(false),
80 num_render_input_channels_(1),
81 num_capture_input_channels_(1),
82 num_capture_output_channels_(1) {
83
84 echo_cancellation_ = new EchoCancellationImpl(this);
85 component_list_.push_back(echo_cancellation_);
86
87 echo_control_mobile_ = new EchoControlMobileImpl(this);
88 component_list_.push_back(echo_control_mobile_);
89
90 gain_control_ = new GainControlImpl(this);
91 component_list_.push_back(gain_control_);
92
93 high_pass_filter_ = new HighPassFilterImpl(this);
94 component_list_.push_back(high_pass_filter_);
95
96 level_estimator_ = new LevelEstimatorImpl(this);
97 component_list_.push_back(level_estimator_);
98
99 noise_suppression_ = new NoiseSuppressionImpl(this);
100 component_list_.push_back(noise_suppression_);
101
102 voice_detection_ = new VoiceDetectionImpl(this);
103 component_list_.push_back(voice_detection_);
104 }
105
~AudioProcessingImpl()106 AudioProcessingImpl::~AudioProcessingImpl() {
107 while (!component_list_.empty()) {
108 ProcessingComponent* component = component_list_.front();
109 component->Destroy();
110 delete component;
111 component_list_.pop_front();
112 }
113
114 if (debug_file_->Open()) {
115 debug_file_->CloseFile();
116 }
117 delete debug_file_;
118 debug_file_ = NULL;
119
120 delete crit_;
121 crit_ = NULL;
122
123 if (render_audio_ != NULL) {
124 delete render_audio_;
125 render_audio_ = NULL;
126 }
127
128 if (capture_audio_ != NULL) {
129 delete capture_audio_;
130 capture_audio_ = NULL;
131 }
132 }
133
crit() const134 CriticalSectionWrapper* AudioProcessingImpl::crit() const {
135 return crit_;
136 }
137
split_sample_rate_hz() const138 int AudioProcessingImpl::split_sample_rate_hz() const {
139 return split_sample_rate_hz_;
140 }
141
Initialize()142 int AudioProcessingImpl::Initialize() {
143 CriticalSectionScoped crit_scoped(*crit_);
144 return InitializeLocked();
145 }
146
InitializeLocked()147 int AudioProcessingImpl::InitializeLocked() {
148 if (render_audio_ != NULL) {
149 delete render_audio_;
150 render_audio_ = NULL;
151 }
152
153 if (capture_audio_ != NULL) {
154 delete capture_audio_;
155 capture_audio_ = NULL;
156 }
157
158 render_audio_ = new AudioBuffer(num_render_input_channels_,
159 samples_per_channel_);
160 capture_audio_ = new AudioBuffer(num_capture_input_channels_,
161 samples_per_channel_);
162
163 was_stream_delay_set_ = false;
164
165 // Initialize all components.
166 std::list<ProcessingComponent*>::iterator it;
167 for (it = component_list_.begin(); it != component_list_.end(); it++) {
168 int err = (*it)->Initialize();
169 if (err != kNoError) {
170 return err;
171 }
172 }
173
174 return kNoError;
175 }
176
set_sample_rate_hz(int rate)177 int AudioProcessingImpl::set_sample_rate_hz(int rate) {
178 CriticalSectionScoped crit_scoped(*crit_);
179 if (rate != kSampleRate8kHz &&
180 rate != kSampleRate16kHz &&
181 rate != kSampleRate32kHz) {
182 return kBadParameterError;
183 }
184
185 sample_rate_hz_ = rate;
186 samples_per_channel_ = rate / 100;
187
188 if (sample_rate_hz_ == kSampleRate32kHz) {
189 split_sample_rate_hz_ = kSampleRate16kHz;
190 } else {
191 split_sample_rate_hz_ = sample_rate_hz_;
192 }
193
194 return InitializeLocked();
195 }
196
sample_rate_hz() const197 int AudioProcessingImpl::sample_rate_hz() const {
198 return sample_rate_hz_;
199 }
200
set_num_reverse_channels(int channels)201 int AudioProcessingImpl::set_num_reverse_channels(int channels) {
202 CriticalSectionScoped crit_scoped(*crit_);
203 // Only stereo supported currently.
204 if (channels > 2 || channels < 1) {
205 return kBadParameterError;
206 }
207
208 num_render_input_channels_ = channels;
209
210 return InitializeLocked();
211 }
212
num_reverse_channels() const213 int AudioProcessingImpl::num_reverse_channels() const {
214 return num_render_input_channels_;
215 }
216
set_num_channels(int input_channels,int output_channels)217 int AudioProcessingImpl::set_num_channels(
218 int input_channels,
219 int output_channels) {
220 CriticalSectionScoped crit_scoped(*crit_);
221 if (output_channels > input_channels) {
222 return kBadParameterError;
223 }
224
225 // Only stereo supported currently.
226 if (input_channels > 2 || input_channels < 1) {
227 return kBadParameterError;
228 }
229
230 if (output_channels > 2 || output_channels < 1) {
231 return kBadParameterError;
232 }
233
234 num_capture_input_channels_ = input_channels;
235 num_capture_output_channels_ = output_channels;
236
237 return InitializeLocked();
238 }
239
num_input_channels() const240 int AudioProcessingImpl::num_input_channels() const {
241 return num_capture_input_channels_;
242 }
243
num_output_channels() const244 int AudioProcessingImpl::num_output_channels() const {
245 return num_capture_output_channels_;
246 }
247
ProcessStream(AudioFrame * frame)248 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
249 CriticalSectionScoped crit_scoped(*crit_);
250 int err = kNoError;
251
252 if (frame == NULL) {
253 return kNullPointerError;
254 }
255
256 if (frame->_frequencyInHz !=
257 static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
258 return kBadSampleRateError;
259 }
260
261 if (frame->_audioChannel != num_capture_input_channels_) {
262 return kBadNumberChannelsError;
263 }
264
265 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
266 return kBadDataLengthError;
267 }
268
269 if (debug_file_->Open()) {
270 WebRtc_UWord8 event = kCaptureEvent;
271 if (!debug_file_->Write(&event, sizeof(event))) {
272 return kFileError;
273 }
274
275 if (!debug_file_->Write(&frame->_frequencyInHz,
276 sizeof(frame->_frequencyInHz))) {
277 return kFileError;
278 }
279
280 if (!debug_file_->Write(&frame->_audioChannel,
281 sizeof(frame->_audioChannel))) {
282 return kFileError;
283 }
284
285 if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
286 sizeof(frame->_payloadDataLengthInSamples))) {
287 return kFileError;
288 }
289
290 if (!debug_file_->Write(frame->_payloadData,
291 sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
292 frame->_audioChannel)) {
293 return kFileError;
294 }
295 }
296
297 capture_audio_->DeinterleaveFrom(frame);
298
299 // TODO(ajm): experiment with mixing and AEC placement.
300 if (num_capture_output_channels_ < num_capture_input_channels_) {
301 capture_audio_->Mix(num_capture_output_channels_);
302
303 frame->_audioChannel = num_capture_output_channels_;
304 }
305
306 if (sample_rate_hz_ == kSampleRate32kHz) {
307 for (int i = 0; i < num_capture_input_channels_; i++) {
308 // Split into a low and high band.
309 SplittingFilterAnalysis(capture_audio_->data(i),
310 capture_audio_->low_pass_split_data(i),
311 capture_audio_->high_pass_split_data(i),
312 capture_audio_->analysis_filter_state1(i),
313 capture_audio_->analysis_filter_state2(i));
314 }
315 }
316
317 err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
318 if (err != kNoError) {
319 return err;
320 }
321
322 err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
323 if (err != kNoError) {
324 return err;
325 }
326
327 err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
328 if (err != kNoError) {
329 return err;
330 }
331
332 if (echo_control_mobile_->is_enabled() &&
333 noise_suppression_->is_enabled()) {
334 capture_audio_->CopyLowPassToReference();
335 }
336
337 err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
338 if (err != kNoError) {
339 return err;
340 }
341
342 err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
343 if (err != kNoError) {
344 return err;
345 }
346
347 err = voice_detection_->ProcessCaptureAudio(capture_audio_);
348 if (err != kNoError) {
349 return err;
350 }
351
352 err = gain_control_->ProcessCaptureAudio(capture_audio_);
353 if (err != kNoError) {
354 return err;
355 }
356
357 //err = level_estimator_->ProcessCaptureAudio(capture_audio_);
358 //if (err != kNoError) {
359 // return err;
360 //}
361
362 if (sample_rate_hz_ == kSampleRate32kHz) {
363 for (int i = 0; i < num_capture_output_channels_; i++) {
364 // Recombine low and high bands.
365 SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
366 capture_audio_->high_pass_split_data(i),
367 capture_audio_->data(i),
368 capture_audio_->synthesis_filter_state1(i),
369 capture_audio_->synthesis_filter_state2(i));
370 }
371 }
372
373 capture_audio_->InterleaveTo(frame);
374
375 return kNoError;
376 }
377
AnalyzeReverseStream(AudioFrame * frame)378 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
379 CriticalSectionScoped crit_scoped(*crit_);
380 int err = kNoError;
381
382 if (frame == NULL) {
383 return kNullPointerError;
384 }
385
386 if (frame->_frequencyInHz !=
387 static_cast<WebRtc_UWord32>(sample_rate_hz_)) {
388 return kBadSampleRateError;
389 }
390
391 if (frame->_audioChannel != num_render_input_channels_) {
392 return kBadNumberChannelsError;
393 }
394
395 if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
396 return kBadDataLengthError;
397 }
398
399 if (debug_file_->Open()) {
400 WebRtc_UWord8 event = kRenderEvent;
401 if (!debug_file_->Write(&event, sizeof(event))) {
402 return kFileError;
403 }
404
405 if (!debug_file_->Write(&frame->_frequencyInHz,
406 sizeof(frame->_frequencyInHz))) {
407 return kFileError;
408 }
409
410 if (!debug_file_->Write(&frame->_audioChannel,
411 sizeof(frame->_audioChannel))) {
412 return kFileError;
413 }
414
415 if (!debug_file_->Write(&frame->_payloadDataLengthInSamples,
416 sizeof(frame->_payloadDataLengthInSamples))) {
417 return kFileError;
418 }
419
420 if (!debug_file_->Write(frame->_payloadData,
421 sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples *
422 frame->_audioChannel)) {
423 return kFileError;
424 }
425 }
426
427 render_audio_->DeinterleaveFrom(frame);
428
429 // TODO(ajm): turn the splitting filter into a component?
430 if (sample_rate_hz_ == kSampleRate32kHz) {
431 for (int i = 0; i < num_render_input_channels_; i++) {
432 // Split into low and high band.
433 SplittingFilterAnalysis(render_audio_->data(i),
434 render_audio_->low_pass_split_data(i),
435 render_audio_->high_pass_split_data(i),
436 render_audio_->analysis_filter_state1(i),
437 render_audio_->analysis_filter_state2(i));
438 }
439 }
440
441 // TODO(ajm): warnings possible from components?
442 err = echo_cancellation_->ProcessRenderAudio(render_audio_);
443 if (err != kNoError) {
444 return err;
445 }
446
447 err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
448 if (err != kNoError) {
449 return err;
450 }
451
452 err = gain_control_->ProcessRenderAudio(render_audio_);
453 if (err != kNoError) {
454 return err;
455 }
456
457 //err = level_estimator_->AnalyzeReverseStream(render_audio_);
458 //if (err != kNoError) {
459 // return err;
460 //}
461
462 was_stream_delay_set_ = false;
463 return err; // TODO(ajm): this is for returning warnings; necessary?
464 }
465
set_stream_delay_ms(int delay)466 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
467 was_stream_delay_set_ = true;
468 if (delay < 0) {
469 return kBadParameterError;
470 }
471
472 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
473 if (delay > 500) {
474 stream_delay_ms_ = 500;
475 return kBadStreamParameterWarning;
476 }
477
478 stream_delay_ms_ = delay;
479 return kNoError;
480 }
481
stream_delay_ms() const482 int AudioProcessingImpl::stream_delay_ms() const {
483 return stream_delay_ms_;
484 }
485
was_stream_delay_set() const486 bool AudioProcessingImpl::was_stream_delay_set() const {
487 return was_stream_delay_set_;
488 }
489
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])490 int AudioProcessingImpl::StartDebugRecording(
491 const char filename[AudioProcessing::kMaxFilenameSize]) {
492 CriticalSectionScoped crit_scoped(*crit_);
493 assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
494
495 if (filename == NULL) {
496 return kNullPointerError;
497 }
498
499 // Stop any ongoing recording.
500 if (debug_file_->Open()) {
501 if (debug_file_->CloseFile() == -1) {
502 return kFileError;
503 }
504 }
505
506 if (debug_file_->OpenFile(filename, false) == -1) {
507 debug_file_->CloseFile();
508 return kFileError;
509 }
510
511 if (debug_file_->WriteText("%s\n", kMagicNumber) == -1) {
512 debug_file_->CloseFile();
513 return kFileError;
514 }
515
516 // TODO(ajm): should we do this? If so, we need the number of channels etc.
517 // Record the default sample rate.
518 WebRtc_UWord8 event = kInitializeEvent;
519 if (!debug_file_->Write(&event, sizeof(event))) {
520 return kFileError;
521 }
522
523 if (!debug_file_->Write(&sample_rate_hz_, sizeof(sample_rate_hz_))) {
524 return kFileError;
525 }
526
527 return kNoError;
528 }
529
StopDebugRecording()530 int AudioProcessingImpl::StopDebugRecording() {
531 CriticalSectionScoped crit_scoped(*crit_);
532 // We just return if recording hasn't started.
533 if (debug_file_->Open()) {
534 if (debug_file_->CloseFile() == -1) {
535 return kFileError;
536 }
537 }
538
539 return kNoError;
540 }
541
echo_cancellation() const542 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
543 return echo_cancellation_;
544 }
545
echo_control_mobile() const546 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
547 return echo_control_mobile_;
548 }
549
gain_control() const550 GainControl* AudioProcessingImpl::gain_control() const {
551 return gain_control_;
552 }
553
high_pass_filter() const554 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
555 return high_pass_filter_;
556 }
557
level_estimator() const558 LevelEstimator* AudioProcessingImpl::level_estimator() const {
559 return level_estimator_;
560 }
561
noise_suppression() const562 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
563 return noise_suppression_;
564 }
565
voice_detection() const566 VoiceDetection* AudioProcessingImpl::voice_detection() const {
567 return voice_detection_;
568 }
569
Version(WebRtc_Word8 * version,WebRtc_UWord32 & bytes_remaining,WebRtc_UWord32 & position) const570 WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version,
571 WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const {
572 if (version == NULL) {
573 /*WEBRTC_TRACE(webrtc::kTraceError,
574 webrtc::kTraceAudioProcessing,
575 -1,
576 "Null version pointer");*/
577 return kNullPointerError;
578 }
579 memset(&version[position], 0, bytes_remaining);
580
581 WebRtc_Word8 my_version[] = "AudioProcessing 1.0.0";
582 // Includes null termination.
583 WebRtc_UWord32 length = static_cast<WebRtc_UWord32>(strlen(my_version));
584 if (bytes_remaining < length) {
585 /*WEBRTC_TRACE(webrtc::kTraceError,
586 webrtc::kTraceAudioProcessing,
587 -1,
588 "Buffer of insufficient length");*/
589 return kBadParameterError;
590 }
591 memcpy(&version[position], my_version, length);
592 bytes_remaining -= length;
593 position += length;
594
595 std::list<ProcessingComponent*>::const_iterator it;
596 for (it = component_list_.begin(); it != component_list_.end(); it++) {
597 char component_version[256];
598 strcpy(component_version, "\n");
599 int err = (*it)->get_version(&component_version[1],
600 sizeof(component_version) - 1);
601 if (err != kNoError) {
602 return err;
603 }
604 if (strncmp(&component_version[1], "\0", 1) == 0) {
605 // Assume empty if first byte is NULL.
606 continue;
607 }
608
609 length = static_cast<WebRtc_UWord32>(strlen(component_version));
610 if (bytes_remaining < length) {
611 /*WEBRTC_TRACE(webrtc::kTraceError,
612 webrtc::kTraceAudioProcessing,
613 -1,
614 "Buffer of insufficient length");*/
615 return kBadParameterError;
616 }
617 memcpy(&version[position], component_version, length);
618 bytes_remaining -= length;
619 position += length;
620 }
621
622 return kNoError;
623 }
624
ChangeUniqueId(const WebRtc_Word32 id)625 WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
626 CriticalSectionScoped crit_scoped(*crit_);
627 /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
628 webrtc::kTraceAudioProcessing,
629 id_,
630 "ChangeUniqueId(new id = %d)",
631 id);*/
632 id_ = id;
633
634 return kNoError;
635 }
636 } // namespace webrtc
637