1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/audio_processing_impl.h"
12
13 #include <assert.h>
14 #include <algorithm>
15
16 #include "webrtc/base/checks.h"
17 #include "webrtc/base/platform_file.h"
18 #include "webrtc/base/trace_event.h"
19 #include "webrtc/common_audio/audio_converter.h"
20 #include "webrtc/common_audio/channel_buffer.h"
21 #include "webrtc/common_audio/include/audio_util.h"
22 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
23 extern "C" {
24 #include "webrtc/modules/audio_processing/aec/aec_core.h"
25 }
26 #include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
27 #include "webrtc/modules/audio_processing/audio_buffer.h"
28 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
29 #include "webrtc/modules/audio_processing/common.h"
30 #include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
31 #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
32 #include "webrtc/modules/audio_processing/gain_control_impl.h"
33 #include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
34 #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
35 #include "webrtc/modules/audio_processing/level_estimator_impl.h"
36 #include "webrtc/modules/audio_processing/noise_suppression_impl.h"
37 #include "webrtc/modules/audio_processing/processing_component.h"
38 #include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
39 #include "webrtc/modules/audio_processing/voice_detection_impl.h"
40 #include "webrtc/modules/include/module_common_types.h"
41 #include "webrtc/system_wrappers/include/file_wrapper.h"
42 #include "webrtc/system_wrappers/include/logging.h"
43 #include "webrtc/system_wrappers/include/metrics.h"
44
45 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
46 // Files generated at build-time by the protobuf compiler.
47 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
48 #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"
49 #else
50 #include "webrtc/audio_processing/debug.pb.h"
51 #endif
52 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
53
54 #define RETURN_ON_ERR(expr) \
55 do { \
56 int err = (expr); \
57 if (err != kNoError) { \
58 return err; \
59 } \
60 } while (0)
61
62 namespace webrtc {
63 namespace {
64
LayoutHasKeyboard(AudioProcessing::ChannelLayout layout)65 static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
66 switch (layout) {
67 case AudioProcessing::kMono:
68 case AudioProcessing::kStereo:
69 return false;
70 case AudioProcessing::kMonoAndKeyboard:
71 case AudioProcessing::kStereoAndKeyboard:
72 return true;
73 }
74
75 assert(false);
76 return false;
77 }
78 } // namespace
79
80 // Throughout webrtc, it's assumed that success is represented by zero.
81 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
82
83 // This class has two main functionalities:
84 //
85 // 1) It is returned instead of the real GainControl after the new AGC has been
86 // enabled in order to prevent an outside user from overriding compression
87 // settings. It doesn't do anything in its implementation, except for
88 // delegating the const methods and Enable calls to the real GainControl, so
89 // AGC can still be disabled.
90 //
91 // 2) It is injected into AgcManagerDirect and implements volume callbacks for
92 // getting and setting the volume level. It just caches this value to be used
93 // in VoiceEngine later.
94 class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
95 public:
GainControlForNewAgc(GainControlImpl * gain_control)96 explicit GainControlForNewAgc(GainControlImpl* gain_control)
97 : real_gain_control_(gain_control), volume_(0) {}
98
99 // GainControl implementation.
Enable(bool enable)100 int Enable(bool enable) override {
101 return real_gain_control_->Enable(enable);
102 }
is_enabled() const103 bool is_enabled() const override { return real_gain_control_->is_enabled(); }
set_stream_analog_level(int level)104 int set_stream_analog_level(int level) override {
105 volume_ = level;
106 return AudioProcessing::kNoError;
107 }
stream_analog_level()108 int stream_analog_level() override { return volume_; }
set_mode(Mode mode)109 int set_mode(Mode mode) override { return AudioProcessing::kNoError; }
mode() const110 Mode mode() const override { return GainControl::kAdaptiveAnalog; }
set_target_level_dbfs(int level)111 int set_target_level_dbfs(int level) override {
112 return AudioProcessing::kNoError;
113 }
target_level_dbfs() const114 int target_level_dbfs() const override {
115 return real_gain_control_->target_level_dbfs();
116 }
set_compression_gain_db(int gain)117 int set_compression_gain_db(int gain) override {
118 return AudioProcessing::kNoError;
119 }
compression_gain_db() const120 int compression_gain_db() const override {
121 return real_gain_control_->compression_gain_db();
122 }
enable_limiter(bool enable)123 int enable_limiter(bool enable) override { return AudioProcessing::kNoError; }
is_limiter_enabled() const124 bool is_limiter_enabled() const override {
125 return real_gain_control_->is_limiter_enabled();
126 }
set_analog_level_limits(int minimum,int maximum)127 int set_analog_level_limits(int minimum, int maximum) override {
128 return AudioProcessing::kNoError;
129 }
analog_level_minimum() const130 int analog_level_minimum() const override {
131 return real_gain_control_->analog_level_minimum();
132 }
analog_level_maximum() const133 int analog_level_maximum() const override {
134 return real_gain_control_->analog_level_maximum();
135 }
stream_is_saturated() const136 bool stream_is_saturated() const override {
137 return real_gain_control_->stream_is_saturated();
138 }
139
140 // VolumeCallbacks implementation.
SetMicVolume(int volume)141 void SetMicVolume(int volume) override { volume_ = volume; }
GetMicVolume()142 int GetMicVolume() override { return volume_; }
143
144 private:
145 GainControl* real_gain_control_;
146 int volume_;
147 };
148
149 struct AudioProcessingImpl::ApmPublicSubmodules {
ApmPublicSubmoduleswebrtc::AudioProcessingImpl::ApmPublicSubmodules150 ApmPublicSubmodules()
151 : echo_cancellation(nullptr),
152 echo_control_mobile(nullptr),
153 gain_control(nullptr) {}
154 // Accessed externally of APM without any lock acquired.
155 EchoCancellationImpl* echo_cancellation;
156 EchoControlMobileImpl* echo_control_mobile;
157 GainControlImpl* gain_control;
158 rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter;
159 rtc::scoped_ptr<LevelEstimatorImpl> level_estimator;
160 rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression;
161 rtc::scoped_ptr<VoiceDetectionImpl> voice_detection;
162 rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc;
163
164 // Accessed internally from both render and capture.
165 rtc::scoped_ptr<TransientSuppressor> transient_suppressor;
166 rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer;
167 };
168
169 struct AudioProcessingImpl::ApmPrivateSubmodules {
ApmPrivateSubmoduleswebrtc::AudioProcessingImpl::ApmPrivateSubmodules170 explicit ApmPrivateSubmodules(Beamformer<float>* beamformer)
171 : beamformer(beamformer) {}
172 // Accessed internally from capture or during initialization
173 std::list<ProcessingComponent*> component_list;
174 rtc::scoped_ptr<Beamformer<float>> beamformer;
175 rtc::scoped_ptr<AgcManagerDirect> agc_manager;
176 };
177
178 const int AudioProcessing::kNativeSampleRatesHz[] = {
179 AudioProcessing::kSampleRate8kHz,
180 AudioProcessing::kSampleRate16kHz,
181 AudioProcessing::kSampleRate32kHz,
182 AudioProcessing::kSampleRate48kHz};
183 const size_t AudioProcessing::kNumNativeSampleRates =
184 arraysize(AudioProcessing::kNativeSampleRatesHz);
185 const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
186 kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
187 const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
188
Create()189 AudioProcessing* AudioProcessing::Create() {
190 Config config;
191 return Create(config, nullptr);
192 }
193
Create(const Config & config)194 AudioProcessing* AudioProcessing::Create(const Config& config) {
195 return Create(config, nullptr);
196 }
197
Create(const Config & config,Beamformer<float> * beamformer)198 AudioProcessing* AudioProcessing::Create(const Config& config,
199 Beamformer<float>* beamformer) {
200 AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
201 if (apm->Initialize() != kNoError) {
202 delete apm;
203 apm = nullptr;
204 }
205
206 return apm;
207 }
208
AudioProcessingImpl(const Config & config)209 AudioProcessingImpl::AudioProcessingImpl(const Config& config)
210 : AudioProcessingImpl(config, nullptr) {}
211
AudioProcessingImpl(const Config & config,Beamformer<float> * beamformer)212 AudioProcessingImpl::AudioProcessingImpl(const Config& config,
213 Beamformer<float>* beamformer)
214 : public_submodules_(new ApmPublicSubmodules()),
215 private_submodules_(new ApmPrivateSubmodules(beamformer)),
216 constants_(config.Get<ExperimentalAgc>().startup_min_volume,
217 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
218 false,
219 #else
220 config.Get<ExperimentalAgc>().enabled,
221 #endif
222 config.Get<Intelligibility>().enabled),
223
224 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
225 capture_(false,
226 #else
227 capture_(config.Get<ExperimentalNs>().enabled,
228 #endif
229 config.Get<Beamforming>().array_geometry,
230 config.Get<Beamforming>().target_direction),
231 capture_nonlocked_(config.Get<Beamforming>().enabled)
232 {
233 {
234 rtc::CritScope cs_render(&crit_render_);
235 rtc::CritScope cs_capture(&crit_capture_);
236
237 public_submodules_->echo_cancellation =
238 new EchoCancellationImpl(this, &crit_render_, &crit_capture_);
239 public_submodules_->echo_control_mobile =
240 new EchoControlMobileImpl(this, &crit_render_, &crit_capture_);
241 public_submodules_->gain_control =
242 new GainControlImpl(this, &crit_capture_, &crit_capture_);
243 public_submodules_->high_pass_filter.reset(
244 new HighPassFilterImpl(&crit_capture_));
245 public_submodules_->level_estimator.reset(
246 new LevelEstimatorImpl(&crit_capture_));
247 public_submodules_->noise_suppression.reset(
248 new NoiseSuppressionImpl(&crit_capture_));
249 public_submodules_->voice_detection.reset(
250 new VoiceDetectionImpl(&crit_capture_));
251 public_submodules_->gain_control_for_new_agc.reset(
252 new GainControlForNewAgc(public_submodules_->gain_control));
253
254 private_submodules_->component_list.push_back(
255 public_submodules_->echo_cancellation);
256 private_submodules_->component_list.push_back(
257 public_submodules_->echo_control_mobile);
258 private_submodules_->component_list.push_back(
259 public_submodules_->gain_control);
260 }
261
262 SetExtraOptions(config);
263 }
264
~AudioProcessingImpl()265 AudioProcessingImpl::~AudioProcessingImpl() {
266 // Depends on gain_control_ and
267 // public_submodules_->gain_control_for_new_agc.
268 private_submodules_->agc_manager.reset();
269 // Depends on gain_control_.
270 public_submodules_->gain_control_for_new_agc.reset();
271 while (!private_submodules_->component_list.empty()) {
272 ProcessingComponent* component =
273 private_submodules_->component_list.front();
274 component->Destroy();
275 delete component;
276 private_submodules_->component_list.pop_front();
277 }
278
279 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
280 if (debug_dump_.debug_file->Open()) {
281 debug_dump_.debug_file->CloseFile();
282 }
283 #endif
284 }
285
Initialize()286 int AudioProcessingImpl::Initialize() {
287 // Run in a single-threaded manner during initialization.
288 rtc::CritScope cs_render(&crit_render_);
289 rtc::CritScope cs_capture(&crit_capture_);
290 return InitializeLocked();
291 }
292
Initialize(int input_sample_rate_hz,int output_sample_rate_hz,int reverse_sample_rate_hz,ChannelLayout input_layout,ChannelLayout output_layout,ChannelLayout reverse_layout)293 int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
294 int output_sample_rate_hz,
295 int reverse_sample_rate_hz,
296 ChannelLayout input_layout,
297 ChannelLayout output_layout,
298 ChannelLayout reverse_layout) {
299 const ProcessingConfig processing_config = {
300 {{input_sample_rate_hz,
301 ChannelsFromLayout(input_layout),
302 LayoutHasKeyboard(input_layout)},
303 {output_sample_rate_hz,
304 ChannelsFromLayout(output_layout),
305 LayoutHasKeyboard(output_layout)},
306 {reverse_sample_rate_hz,
307 ChannelsFromLayout(reverse_layout),
308 LayoutHasKeyboard(reverse_layout)},
309 {reverse_sample_rate_hz,
310 ChannelsFromLayout(reverse_layout),
311 LayoutHasKeyboard(reverse_layout)}}};
312
313 return Initialize(processing_config);
314 }
315
Initialize(const ProcessingConfig & processing_config)316 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
317 // Run in a single-threaded manner during initialization.
318 rtc::CritScope cs_render(&crit_render_);
319 rtc::CritScope cs_capture(&crit_capture_);
320 return InitializeLocked(processing_config);
321 }
322
MaybeInitializeRender(const ProcessingConfig & processing_config)323 int AudioProcessingImpl::MaybeInitializeRender(
324 const ProcessingConfig& processing_config) {
325 return MaybeInitialize(processing_config);
326 }
327
MaybeInitializeCapture(const ProcessingConfig & processing_config)328 int AudioProcessingImpl::MaybeInitializeCapture(
329 const ProcessingConfig& processing_config) {
330 return MaybeInitialize(processing_config);
331 }
332
333 // Calls InitializeLocked() if any of the audio parameters have changed from
334 // their current values (needs to be called while holding the crit_render_lock).
MaybeInitialize(const ProcessingConfig & processing_config)335 int AudioProcessingImpl::MaybeInitialize(
336 const ProcessingConfig& processing_config) {
337 // Called from both threads. Thread check is therefore not possible.
338 if (processing_config == formats_.api_format) {
339 return kNoError;
340 }
341
342 rtc::CritScope cs_capture(&crit_capture_);
343 return InitializeLocked(processing_config);
344 }
345
InitializeLocked()346 int AudioProcessingImpl::InitializeLocked() {
347 const int fwd_audio_buffer_channels =
348 capture_nonlocked_.beamformer_enabled
349 ? formats_.api_format.input_stream().num_channels()
350 : formats_.api_format.output_stream().num_channels();
351 const int rev_audio_buffer_out_num_frames =
352 formats_.api_format.reverse_output_stream().num_frames() == 0
353 ? formats_.rev_proc_format.num_frames()
354 : formats_.api_format.reverse_output_stream().num_frames();
355 if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
356 render_.render_audio.reset(new AudioBuffer(
357 formats_.api_format.reverse_input_stream().num_frames(),
358 formats_.api_format.reverse_input_stream().num_channels(),
359 formats_.rev_proc_format.num_frames(),
360 formats_.rev_proc_format.num_channels(),
361 rev_audio_buffer_out_num_frames));
362 if (rev_conversion_needed()) {
363 render_.render_converter = AudioConverter::Create(
364 formats_.api_format.reverse_input_stream().num_channels(),
365 formats_.api_format.reverse_input_stream().num_frames(),
366 formats_.api_format.reverse_output_stream().num_channels(),
367 formats_.api_format.reverse_output_stream().num_frames());
368 } else {
369 render_.render_converter.reset(nullptr);
370 }
371 } else {
372 render_.render_audio.reset(nullptr);
373 render_.render_converter.reset(nullptr);
374 }
375 capture_.capture_audio.reset(
376 new AudioBuffer(formats_.api_format.input_stream().num_frames(),
377 formats_.api_format.input_stream().num_channels(),
378 capture_nonlocked_.fwd_proc_format.num_frames(),
379 fwd_audio_buffer_channels,
380 formats_.api_format.output_stream().num_frames()));
381
382 // Initialize all components.
383 for (auto item : private_submodules_->component_list) {
384 int err = item->Initialize();
385 if (err != kNoError) {
386 return err;
387 }
388 }
389
390 InitializeExperimentalAgc();
391 InitializeTransient();
392 InitializeBeamformer();
393 InitializeIntelligibility();
394 InitializeHighPassFilter();
395 InitializeNoiseSuppression();
396 InitializeLevelEstimator();
397 InitializeVoiceDetection();
398
399 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
400 if (debug_dump_.debug_file->Open()) {
401 int err = WriteInitMessage();
402 if (err != kNoError) {
403 return err;
404 }
405 }
406 #endif
407
408 return kNoError;
409 }
410
InitializeLocked(const ProcessingConfig & config)411 int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
412 for (const auto& stream : config.streams) {
413 if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) {
414 return kBadSampleRateError;
415 }
416 }
417
418 const size_t num_in_channels = config.input_stream().num_channels();
419 const size_t num_out_channels = config.output_stream().num_channels();
420
421 // Need at least one input channel.
422 // Need either one output channel or as many outputs as there are inputs.
423 if (num_in_channels == 0 ||
424 !(num_out_channels == 1 || num_out_channels == num_in_channels)) {
425 return kBadNumberChannelsError;
426 }
427
428 if (capture_nonlocked_.beamformer_enabled &&
429 num_in_channels != capture_.array_geometry.size()) {
430 return kBadNumberChannelsError;
431 }
432
433 formats_.api_format = config;
434
435 // We process at the closest native rate >= min(input rate, output rate)...
436 const int min_proc_rate =
437 std::min(formats_.api_format.input_stream().sample_rate_hz(),
438 formats_.api_format.output_stream().sample_rate_hz());
439 int fwd_proc_rate;
440 for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
441 fwd_proc_rate = kNativeSampleRatesHz[i];
442 if (fwd_proc_rate >= min_proc_rate) {
443 break;
444 }
445 }
446 // ...with one exception.
447 if (public_submodules_->echo_control_mobile->is_enabled() &&
448 min_proc_rate > kMaxAECMSampleRateHz) {
449 fwd_proc_rate = kMaxAECMSampleRateHz;
450 }
451
452 capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
453
454 // We normally process the reverse stream at 16 kHz. Unless...
455 int rev_proc_rate = kSampleRate16kHz;
456 if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) {
457 // ...the forward stream is at 8 kHz.
458 rev_proc_rate = kSampleRate8kHz;
459 } else {
460 if (formats_.api_format.reverse_input_stream().sample_rate_hz() ==
461 kSampleRate32kHz) {
462 // ...or the input is at 32 kHz, in which case we use the splitting
463 // filter rather than the resampler.
464 rev_proc_rate = kSampleRate32kHz;
465 }
466 }
467
468 // Always downmix the reverse stream to mono for analysis. This has been
469 // demonstrated to work well for AEC in most practical scenarios.
470 formats_.rev_proc_format = StreamConfig(rev_proc_rate, 1);
471
472 if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate32kHz ||
473 capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate48kHz) {
474 capture_nonlocked_.split_rate = kSampleRate16kHz;
475 } else {
476 capture_nonlocked_.split_rate =
477 capture_nonlocked_.fwd_proc_format.sample_rate_hz();
478 }
479
480 return InitializeLocked();
481 }
482
SetExtraOptions(const Config & config)483 void AudioProcessingImpl::SetExtraOptions(const Config& config) {
484 // Run in a single-threaded manner when setting the extra options.
485 rtc::CritScope cs_render(&crit_render_);
486 rtc::CritScope cs_capture(&crit_capture_);
487 for (auto item : private_submodules_->component_list) {
488 item->SetExtraOptions(config);
489 }
490
491 if (capture_.transient_suppressor_enabled !=
492 config.Get<ExperimentalNs>().enabled) {
493 capture_.transient_suppressor_enabled =
494 config.Get<ExperimentalNs>().enabled;
495 InitializeTransient();
496 }
497
498 #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
499 if (capture_nonlocked_.beamformer_enabled !=
500 config.Get<Beamforming>().enabled) {
501 capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
502 if (config.Get<Beamforming>().array_geometry.size() > 1) {
503 capture_.array_geometry = config.Get<Beamforming>().array_geometry;
504 }
505 capture_.target_direction = config.Get<Beamforming>().target_direction;
506 InitializeBeamformer();
507 }
508 #endif // WEBRTC_ANDROID_PLATFORM_BUILD
509 }
510
input_sample_rate_hz() const511 int AudioProcessingImpl::input_sample_rate_hz() const {
512 // Accessed from outside APM, hence a lock is needed.
513 rtc::CritScope cs(&crit_capture_);
514 return formats_.api_format.input_stream().sample_rate_hz();
515 }
516
proc_sample_rate_hz() const517 int AudioProcessingImpl::proc_sample_rate_hz() const {
518 // Used as callback from submodules, hence locking is not allowed.
519 return capture_nonlocked_.fwd_proc_format.sample_rate_hz();
520 }
521
proc_split_sample_rate_hz() const522 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
523 // Used as callback from submodules, hence locking is not allowed.
524 return capture_nonlocked_.split_rate;
525 }
526
num_reverse_channels() const527 size_t AudioProcessingImpl::num_reverse_channels() const {
528 // Used as callback from submodules, hence locking is not allowed.
529 return formats_.rev_proc_format.num_channels();
530 }
531
num_input_channels() const532 size_t AudioProcessingImpl::num_input_channels() const {
533 // Used as callback from submodules, hence locking is not allowed.
534 return formats_.api_format.input_stream().num_channels();
535 }
536
num_proc_channels() const537 size_t AudioProcessingImpl::num_proc_channels() const {
538 // Used as callback from submodules, hence locking is not allowed.
539 return capture_nonlocked_.beamformer_enabled ? 1 : num_output_channels();
540 }
541
num_output_channels() const542 size_t AudioProcessingImpl::num_output_channels() const {
543 // Used as callback from submodules, hence locking is not allowed.
544 return formats_.api_format.output_stream().num_channels();
545 }
546
set_output_will_be_muted(bool muted)547 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
548 rtc::CritScope cs(&crit_capture_);
549 capture_.output_will_be_muted = muted;
550 if (private_submodules_->agc_manager.get()) {
551 private_submodules_->agc_manager->SetCaptureMuted(
552 capture_.output_will_be_muted);
553 }
554 }
555
556
ProcessStream(const float * const * src,size_t samples_per_channel,int input_sample_rate_hz,ChannelLayout input_layout,int output_sample_rate_hz,ChannelLayout output_layout,float * const * dest)557 int AudioProcessingImpl::ProcessStream(const float* const* src,
558 size_t samples_per_channel,
559 int input_sample_rate_hz,
560 ChannelLayout input_layout,
561 int output_sample_rate_hz,
562 ChannelLayout output_layout,
563 float* const* dest) {
564 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_ChannelLayout");
565 StreamConfig input_stream;
566 StreamConfig output_stream;
567 {
568 // Access the formats_.api_format.input_stream beneath the capture lock.
569 // The lock must be released as it is later required in the call
570 // to ProcessStream(,,,);
571 rtc::CritScope cs(&crit_capture_);
572 input_stream = formats_.api_format.input_stream();
573 output_stream = formats_.api_format.output_stream();
574 }
575
576 input_stream.set_sample_rate_hz(input_sample_rate_hz);
577 input_stream.set_num_channels(ChannelsFromLayout(input_layout));
578 input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout));
579 output_stream.set_sample_rate_hz(output_sample_rate_hz);
580 output_stream.set_num_channels(ChannelsFromLayout(output_layout));
581 output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout));
582
583 if (samples_per_channel != input_stream.num_frames()) {
584 return kBadDataLengthError;
585 }
586 return ProcessStream(src, input_stream, output_stream, dest);
587 }
588
ProcessStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)589 int AudioProcessingImpl::ProcessStream(const float* const* src,
590 const StreamConfig& input_config,
591 const StreamConfig& output_config,
592 float* const* dest) {
593 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
594 ProcessingConfig processing_config;
595 {
596 // Acquire the capture lock in order to safely call the function
597 // that retrieves the render side data. This function accesses apm
598 // getters that need the capture lock held when being called.
599 rtc::CritScope cs_capture(&crit_capture_);
600 public_submodules_->echo_cancellation->ReadQueuedRenderData();
601 public_submodules_->echo_control_mobile->ReadQueuedRenderData();
602 public_submodules_->gain_control->ReadQueuedRenderData();
603
604 if (!src || !dest) {
605 return kNullPointerError;
606 }
607
608 processing_config = formats_.api_format;
609 }
610
611 processing_config.input_stream() = input_config;
612 processing_config.output_stream() = output_config;
613
614 {
615 // Do conditional reinitialization.
616 rtc::CritScope cs_render(&crit_render_);
617 RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
618 }
619 rtc::CritScope cs_capture(&crit_capture_);
620 assert(processing_config.input_stream().num_frames() ==
621 formats_.api_format.input_stream().num_frames());
622
623 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
624 if (debug_dump_.debug_file->Open()) {
625 RETURN_ON_ERR(WriteConfigMessage(false));
626
627 debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM);
628 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
629 const size_t channel_size =
630 sizeof(float) * formats_.api_format.input_stream().num_frames();
631 for (size_t i = 0; i < formats_.api_format.input_stream().num_channels();
632 ++i)
633 msg->add_input_channel(src[i], channel_size);
634 }
635 #endif
636
637 capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
638 RETURN_ON_ERR(ProcessStreamLocked());
639 capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
640
641 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
642 if (debug_dump_.debug_file->Open()) {
643 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
644 const size_t channel_size =
645 sizeof(float) * formats_.api_format.output_stream().num_frames();
646 for (size_t i = 0; i < formats_.api_format.output_stream().num_channels();
647 ++i)
648 msg->add_output_channel(dest[i], channel_size);
649 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
650 &crit_debug_, &debug_dump_.capture));
651 }
652 #endif
653
654 return kNoError;
655 }
656
ProcessStream(AudioFrame * frame)657 int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
658 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
659 {
660 // Acquire the capture lock in order to safely call the function
661 // that retrieves the render side data. This function accesses apm
662 // getters that need the capture lock held when being called.
663 // The lock needs to be released as
664 // public_submodules_->echo_control_mobile->is_enabled() aquires this lock
665 // as well.
666 rtc::CritScope cs_capture(&crit_capture_);
667 public_submodules_->echo_cancellation->ReadQueuedRenderData();
668 public_submodules_->echo_control_mobile->ReadQueuedRenderData();
669 public_submodules_->gain_control->ReadQueuedRenderData();
670 }
671
672 if (!frame) {
673 return kNullPointerError;
674 }
675 // Must be a native rate.
676 if (frame->sample_rate_hz_ != kSampleRate8kHz &&
677 frame->sample_rate_hz_ != kSampleRate16kHz &&
678 frame->sample_rate_hz_ != kSampleRate32kHz &&
679 frame->sample_rate_hz_ != kSampleRate48kHz) {
680 return kBadSampleRateError;
681 }
682
683 if (public_submodules_->echo_control_mobile->is_enabled() &&
684 frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
685 LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
686 return kUnsupportedComponentError;
687 }
688
689 ProcessingConfig processing_config;
690 {
691 // Aquire lock for the access of api_format.
692 // The lock is released immediately due to the conditional
693 // reinitialization.
694 rtc::CritScope cs_capture(&crit_capture_);
695 // TODO(ajm): The input and output rates and channels are currently
696 // constrained to be identical in the int16 interface.
697 processing_config = formats_.api_format;
698 }
699 processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_);
700 processing_config.input_stream().set_num_channels(frame->num_channels_);
701 processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_);
702 processing_config.output_stream().set_num_channels(frame->num_channels_);
703
704 {
705 // Do conditional reinitialization.
706 rtc::CritScope cs_render(&crit_render_);
707 RETURN_ON_ERR(MaybeInitializeCapture(processing_config));
708 }
709 rtc::CritScope cs_capture(&crit_capture_);
710 if (frame->samples_per_channel_ !=
711 formats_.api_format.input_stream().num_frames()) {
712 return kBadDataLengthError;
713 }
714
715 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
716 if (debug_dump_.debug_file->Open()) {
717 debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM);
718 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
719 const size_t data_size =
720 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
721 msg->set_input_data(frame->data_, data_size);
722 }
723 #endif
724
725 capture_.capture_audio->DeinterleaveFrom(frame);
726 RETURN_ON_ERR(ProcessStreamLocked());
727 capture_.capture_audio->InterleaveTo(frame,
728 output_copy_needed(is_data_processed()));
729
730 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
731 if (debug_dump_.debug_file->Open()) {
732 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
733 const size_t data_size =
734 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
735 msg->set_output_data(frame->data_, data_size);
736 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
737 &crit_debug_, &debug_dump_.capture));
738 }
739 #endif
740
741 return kNoError;
742 }
743
ProcessStreamLocked()744 int AudioProcessingImpl::ProcessStreamLocked() {
745 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
746 if (debug_dump_.debug_file->Open()) {
747 audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
748 msg->set_delay(capture_nonlocked_.stream_delay_ms);
749 msg->set_drift(
750 public_submodules_->echo_cancellation->stream_drift_samples());
751 msg->set_level(gain_control()->stream_analog_level());
752 msg->set_keypress(capture_.key_pressed);
753 }
754 #endif
755
756 MaybeUpdateHistograms();
757
758 AudioBuffer* ca = capture_.capture_audio.get(); // For brevity.
759
760 if (constants_.use_new_agc &&
761 public_submodules_->gain_control->is_enabled()) {
762 private_submodules_->agc_manager->AnalyzePreProcess(
763 ca->channels()[0], ca->num_channels(),
764 capture_nonlocked_.fwd_proc_format.num_frames());
765 }
766
767 bool data_processed = is_data_processed();
768 if (analysis_needed(data_processed)) {
769 ca->SplitIntoFrequencyBands();
770 }
771
772 if (constants_.intelligibility_enabled) {
773 public_submodules_->intelligibility_enhancer->AnalyzeCaptureAudio(
774 ca->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
775 ca->num_channels());
776 }
777
778 if (capture_nonlocked_.beamformer_enabled) {
779 private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(),
780 ca->split_data_f());
781 ca->set_num_channels(1);
782 }
783
784 public_submodules_->high_pass_filter->ProcessCaptureAudio(ca);
785 RETURN_ON_ERR(public_submodules_->gain_control->AnalyzeCaptureAudio(ca));
786 public_submodules_->noise_suppression->AnalyzeCaptureAudio(ca);
787 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(ca));
788
789 if (public_submodules_->echo_control_mobile->is_enabled() &&
790 public_submodules_->noise_suppression->is_enabled()) {
791 ca->CopyLowPassToReference();
792 }
793 public_submodules_->noise_suppression->ProcessCaptureAudio(ca);
794 RETURN_ON_ERR(
795 public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca));
796 public_submodules_->voice_detection->ProcessCaptureAudio(ca);
797
798 if (constants_.use_new_agc &&
799 public_submodules_->gain_control->is_enabled() &&
800 (!capture_nonlocked_.beamformer_enabled ||
801 private_submodules_->beamformer->is_target_present())) {
802 private_submodules_->agc_manager->Process(
803 ca->split_bands_const(0)[kBand0To8kHz], ca->num_frames_per_band(),
804 capture_nonlocked_.split_rate);
805 }
806 RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(ca));
807
808 if (synthesis_needed(data_processed)) {
809 ca->MergeFrequencyBands();
810 }
811
812 // TODO(aluebs): Investigate if the transient suppression placement should be
813 // before or after the AGC.
814 if (capture_.transient_suppressor_enabled) {
815 float voice_probability =
816 private_submodules_->agc_manager.get()
817 ? private_submodules_->agc_manager->voice_probability()
818 : 1.f;
819
820 public_submodules_->transient_suppressor->Suppress(
821 ca->channels_f()[0], ca->num_frames(), ca->num_channels(),
822 ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(),
823 ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability,
824 capture_.key_pressed);
825 }
826
827 // The level estimator operates on the recombined data.
828 public_submodules_->level_estimator->ProcessStream(ca);
829
830 capture_.was_stream_delay_set = false;
831 return kNoError;
832 }
833
AnalyzeReverseStream(const float * const * data,size_t samples_per_channel,int rev_sample_rate_hz,ChannelLayout layout)834 int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
835 size_t samples_per_channel,
836 int rev_sample_rate_hz,
837 ChannelLayout layout) {
838 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_ChannelLayout");
839 rtc::CritScope cs(&crit_render_);
840 const StreamConfig reverse_config = {
841 rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout),
842 };
843 if (samples_per_channel != reverse_config.num_frames()) {
844 return kBadDataLengthError;
845 }
846 return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
847 }
848
ProcessReverseStream(const float * const * src,const StreamConfig & reverse_input_config,const StreamConfig & reverse_output_config,float * const * dest)849 int AudioProcessingImpl::ProcessReverseStream(
850 const float* const* src,
851 const StreamConfig& reverse_input_config,
852 const StreamConfig& reverse_output_config,
853 float* const* dest) {
854 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
855 rtc::CritScope cs(&crit_render_);
856 RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config,
857 reverse_output_config));
858 if (is_rev_processed()) {
859 render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
860 dest);
861 } else if (render_check_rev_conversion_needed()) {
862 render_.render_converter->Convert(src, reverse_input_config.num_samples(),
863 dest,
864 reverse_output_config.num_samples());
865 } else {
866 CopyAudioIfNeeded(src, reverse_input_config.num_frames(),
867 reverse_input_config.num_channels(), dest);
868 }
869
870 return kNoError;
871 }
872
AnalyzeReverseStreamLocked(const float * const * src,const StreamConfig & reverse_input_config,const StreamConfig & reverse_output_config)873 int AudioProcessingImpl::AnalyzeReverseStreamLocked(
874 const float* const* src,
875 const StreamConfig& reverse_input_config,
876 const StreamConfig& reverse_output_config) {
877 if (src == nullptr) {
878 return kNullPointerError;
879 }
880
881 if (reverse_input_config.num_channels() == 0) {
882 return kBadNumberChannelsError;
883 }
884
885 ProcessingConfig processing_config = formats_.api_format;
886 processing_config.reverse_input_stream() = reverse_input_config;
887 processing_config.reverse_output_stream() = reverse_output_config;
888
889 RETURN_ON_ERR(MaybeInitializeRender(processing_config));
890 assert(reverse_input_config.num_frames() ==
891 formats_.api_format.reverse_input_stream().num_frames());
892
893 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
894 if (debug_dump_.debug_file->Open()) {
895 debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM);
896 audioproc::ReverseStream* msg =
897 debug_dump_.render.event_msg->mutable_reverse_stream();
898 const size_t channel_size =
899 sizeof(float) * formats_.api_format.reverse_input_stream().num_frames();
900 for (size_t i = 0;
901 i < formats_.api_format.reverse_input_stream().num_channels(); ++i)
902 msg->add_channel(src[i], channel_size);
903 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
904 &crit_debug_, &debug_dump_.render));
905 }
906 #endif
907
908 render_.render_audio->CopyFrom(src,
909 formats_.api_format.reverse_input_stream());
910 return ProcessReverseStreamLocked();
911 }
912
ProcessReverseStream(AudioFrame * frame)913 int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
914 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
915 RETURN_ON_ERR(AnalyzeReverseStream(frame));
916 rtc::CritScope cs(&crit_render_);
917 if (is_rev_processed()) {
918 render_.render_audio->InterleaveTo(frame, true);
919 }
920
921 return kNoError;
922 }
923
AnalyzeReverseStream(AudioFrame * frame)924 int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
925 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_AudioFrame");
926 rtc::CritScope cs(&crit_render_);
927 if (frame == nullptr) {
928 return kNullPointerError;
929 }
930 // Must be a native rate.
931 if (frame->sample_rate_hz_ != kSampleRate8kHz &&
932 frame->sample_rate_hz_ != kSampleRate16kHz &&
933 frame->sample_rate_hz_ != kSampleRate32kHz &&
934 frame->sample_rate_hz_ != kSampleRate48kHz) {
935 return kBadSampleRateError;
936 }
937 // This interface does not tolerate different forward and reverse rates.
938 if (frame->sample_rate_hz_ !=
939 formats_.api_format.input_stream().sample_rate_hz()) {
940 return kBadSampleRateError;
941 }
942
943 if (frame->num_channels_ <= 0) {
944 return kBadNumberChannelsError;
945 }
946
947 ProcessingConfig processing_config = formats_.api_format;
948 processing_config.reverse_input_stream().set_sample_rate_hz(
949 frame->sample_rate_hz_);
950 processing_config.reverse_input_stream().set_num_channels(
951 frame->num_channels_);
952 processing_config.reverse_output_stream().set_sample_rate_hz(
953 frame->sample_rate_hz_);
954 processing_config.reverse_output_stream().set_num_channels(
955 frame->num_channels_);
956
957 RETURN_ON_ERR(MaybeInitializeRender(processing_config));
958 if (frame->samples_per_channel_ !=
959 formats_.api_format.reverse_input_stream().num_frames()) {
960 return kBadDataLengthError;
961 }
962
963 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
964 if (debug_dump_.debug_file->Open()) {
965 debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM);
966 audioproc::ReverseStream* msg =
967 debug_dump_.render.event_msg->mutable_reverse_stream();
968 const size_t data_size =
969 sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
970 msg->set_data(frame->data_, data_size);
971 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
972 &crit_debug_, &debug_dump_.render));
973 }
974 #endif
975 render_.render_audio->DeinterleaveFrom(frame);
976 return ProcessReverseStreamLocked();
977 }
978
ProcessReverseStreamLocked()979 int AudioProcessingImpl::ProcessReverseStreamLocked() {
980 AudioBuffer* ra = render_.render_audio.get(); // For brevity.
981 if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) {
982 ra->SplitIntoFrequencyBands();
983 }
984
985 if (constants_.intelligibility_enabled) {
986 // Currently run in single-threaded mode when the intelligibility
987 // enhancer is activated.
988 // TODO(peah): Fix to be properly multi-threaded.
989 rtc::CritScope cs(&crit_capture_);
990 public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
991 ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
992 ra->num_channels());
993 }
994
995 RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra));
996 RETURN_ON_ERR(
997 public_submodules_->echo_control_mobile->ProcessRenderAudio(ra));
998 if (!constants_.use_new_agc) {
999 RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra));
1000 }
1001
1002 if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz &&
1003 is_rev_processed()) {
1004 ra->MergeFrequencyBands();
1005 }
1006
1007 return kNoError;
1008 }
1009
set_stream_delay_ms(int delay)1010 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
1011 rtc::CritScope cs(&crit_capture_);
1012 Error retval = kNoError;
1013 capture_.was_stream_delay_set = true;
1014 delay += capture_.delay_offset_ms;
1015
1016 if (delay < 0) {
1017 delay = 0;
1018 retval = kBadStreamParameterWarning;
1019 }
1020
1021 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
1022 if (delay > 500) {
1023 delay = 500;
1024 retval = kBadStreamParameterWarning;
1025 }
1026
1027 capture_nonlocked_.stream_delay_ms = delay;
1028 return retval;
1029 }
1030
stream_delay_ms() const1031 int AudioProcessingImpl::stream_delay_ms() const {
1032 // Used as callback from submodules, hence locking is not allowed.
1033 return capture_nonlocked_.stream_delay_ms;
1034 }
1035
was_stream_delay_set() const1036 bool AudioProcessingImpl::was_stream_delay_set() const {
1037 // Used as callback from submodules, hence locking is not allowed.
1038 return capture_.was_stream_delay_set;
1039 }
1040
set_stream_key_pressed(bool key_pressed)1041 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
1042 rtc::CritScope cs(&crit_capture_);
1043 capture_.key_pressed = key_pressed;
1044 }
1045
set_delay_offset_ms(int offset)1046 void AudioProcessingImpl::set_delay_offset_ms(int offset) {
1047 rtc::CritScope cs(&crit_capture_);
1048 capture_.delay_offset_ms = offset;
1049 }
1050
delay_offset_ms() const1051 int AudioProcessingImpl::delay_offset_ms() const {
1052 rtc::CritScope cs(&crit_capture_);
1053 return capture_.delay_offset_ms;
1054 }
1055
StartDebugRecording(const char filename[AudioProcessing::kMaxFilenameSize])1056 int AudioProcessingImpl::StartDebugRecording(
1057 const char filename[AudioProcessing::kMaxFilenameSize]) {
1058 // Run in a single-threaded manner.
1059 rtc::CritScope cs_render(&crit_render_);
1060 rtc::CritScope cs_capture(&crit_capture_);
1061 static_assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize, "");
1062
1063 if (filename == nullptr) {
1064 return kNullPointerError;
1065 }
1066
1067 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1068 // Stop any ongoing recording.
1069 if (debug_dump_.debug_file->Open()) {
1070 if (debug_dump_.debug_file->CloseFile() == -1) {
1071 return kFileError;
1072 }
1073 }
1074
1075 if (debug_dump_.debug_file->OpenFile(filename, false) == -1) {
1076 debug_dump_.debug_file->CloseFile();
1077 return kFileError;
1078 }
1079
1080 RETURN_ON_ERR(WriteConfigMessage(true));
1081 RETURN_ON_ERR(WriteInitMessage());
1082 return kNoError;
1083 #else
1084 return kUnsupportedFunctionError;
1085 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1086 }
1087
StartDebugRecording(FILE * handle)1088 int AudioProcessingImpl::StartDebugRecording(FILE* handle) {
1089 // Run in a single-threaded manner.
1090 rtc::CritScope cs_render(&crit_render_);
1091 rtc::CritScope cs_capture(&crit_capture_);
1092
1093 if (handle == nullptr) {
1094 return kNullPointerError;
1095 }
1096
1097 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1098 // Stop any ongoing recording.
1099 if (debug_dump_.debug_file->Open()) {
1100 if (debug_dump_.debug_file->CloseFile() == -1) {
1101 return kFileError;
1102 }
1103 }
1104
1105 if (debug_dump_.debug_file->OpenFromFileHandle(handle, true, false) == -1) {
1106 return kFileError;
1107 }
1108
1109 RETURN_ON_ERR(WriteConfigMessage(true));
1110 RETURN_ON_ERR(WriteInitMessage());
1111 return kNoError;
1112 #else
1113 return kUnsupportedFunctionError;
1114 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1115 }
1116
StartDebugRecordingForPlatformFile(rtc::PlatformFile handle)1117 int AudioProcessingImpl::StartDebugRecordingForPlatformFile(
1118 rtc::PlatformFile handle) {
1119 // Run in a single-threaded manner.
1120 rtc::CritScope cs_render(&crit_render_);
1121 rtc::CritScope cs_capture(&crit_capture_);
1122 FILE* stream = rtc::FdopenPlatformFileForWriting(handle);
1123 return StartDebugRecording(stream);
1124 }
1125
StopDebugRecording()1126 int AudioProcessingImpl::StopDebugRecording() {
1127 // Run in a single-threaded manner.
1128 rtc::CritScope cs_render(&crit_render_);
1129 rtc::CritScope cs_capture(&crit_capture_);
1130
1131 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
1132 // We just return if recording hasn't started.
1133 if (debug_dump_.debug_file->Open()) {
1134 if (debug_dump_.debug_file->CloseFile() == -1) {
1135 return kFileError;
1136 }
1137 }
1138 return kNoError;
1139 #else
1140 return kUnsupportedFunctionError;
1141 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1142 }
1143
echo_cancellation() const1144 EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
1145 // Adding a lock here has no effect as it allows any access to the submodule
1146 // from the returned pointer.
1147 return public_submodules_->echo_cancellation;
1148 }
1149
echo_control_mobile() const1150 EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
1151 // Adding a lock here has no effect as it allows any access to the submodule
1152 // from the returned pointer.
1153 return public_submodules_->echo_control_mobile;
1154 }
1155
gain_control() const1156 GainControl* AudioProcessingImpl::gain_control() const {
1157 // Adding a lock here has no effect as it allows any access to the submodule
1158 // from the returned pointer.
1159 if (constants_.use_new_agc) {
1160 return public_submodules_->gain_control_for_new_agc.get();
1161 }
1162 return public_submodules_->gain_control;
1163 }
1164
high_pass_filter() const1165 HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
1166 // Adding a lock here has no effect as it allows any access to the submodule
1167 // from the returned pointer.
1168 return public_submodules_->high_pass_filter.get();
1169 }
1170
level_estimator() const1171 LevelEstimator* AudioProcessingImpl::level_estimator() const {
1172 // Adding a lock here has no effect as it allows any access to the submodule
1173 // from the returned pointer.
1174 return public_submodules_->level_estimator.get();
1175 }
1176
noise_suppression() const1177 NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
1178 // Adding a lock here has no effect as it allows any access to the submodule
1179 // from the returned pointer.
1180 return public_submodules_->noise_suppression.get();
1181 }
1182
voice_detection() const1183 VoiceDetection* AudioProcessingImpl::voice_detection() const {
1184 // Adding a lock here has no effect as it allows any access to the submodule
1185 // from the returned pointer.
1186 return public_submodules_->voice_detection.get();
1187 }
1188
is_data_processed() const1189 bool AudioProcessingImpl::is_data_processed() const {
1190 if (capture_nonlocked_.beamformer_enabled) {
1191 return true;
1192 }
1193
1194 int enabled_count = 0;
1195 for (auto item : private_submodules_->component_list) {
1196 if (item->is_component_enabled()) {
1197 enabled_count++;
1198 }
1199 }
1200 if (public_submodules_->high_pass_filter->is_enabled()) {
1201 enabled_count++;
1202 }
1203 if (public_submodules_->noise_suppression->is_enabled()) {
1204 enabled_count++;
1205 }
1206 if (public_submodules_->level_estimator->is_enabled()) {
1207 enabled_count++;
1208 }
1209 if (public_submodules_->voice_detection->is_enabled()) {
1210 enabled_count++;
1211 }
1212
1213 // Data is unchanged if no components are enabled, or if only
1214 // public_submodules_->level_estimator
1215 // or public_submodules_->voice_detection is enabled.
1216 if (enabled_count == 0) {
1217 return false;
1218 } else if (enabled_count == 1) {
1219 if (public_submodules_->level_estimator->is_enabled() ||
1220 public_submodules_->voice_detection->is_enabled()) {
1221 return false;
1222 }
1223 } else if (enabled_count == 2) {
1224 if (public_submodules_->level_estimator->is_enabled() &&
1225 public_submodules_->voice_detection->is_enabled()) {
1226 return false;
1227 }
1228 }
1229 return true;
1230 }
1231
output_copy_needed(bool is_data_processed) const1232 bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
1233 // Check if we've upmixed or downmixed the audio.
1234 return ((formats_.api_format.output_stream().num_channels() !=
1235 formats_.api_format.input_stream().num_channels()) ||
1236 is_data_processed || capture_.transient_suppressor_enabled);
1237 }
1238
synthesis_needed(bool is_data_processed) const1239 bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
1240 return (is_data_processed &&
1241 (capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1242 kSampleRate32kHz ||
1243 capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1244 kSampleRate48kHz));
1245 }
1246
analysis_needed(bool is_data_processed) const1247 bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
1248 if (!is_data_processed &&
1249 !public_submodules_->voice_detection->is_enabled() &&
1250 !capture_.transient_suppressor_enabled) {
1251 // Only public_submodules_->level_estimator is enabled.
1252 return false;
1253 } else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1254 kSampleRate32kHz ||
1255 capture_nonlocked_.fwd_proc_format.sample_rate_hz() ==
1256 kSampleRate48kHz) {
1257 // Something besides public_submodules_->level_estimator is enabled, and we
1258 // have super-wb.
1259 return true;
1260 }
1261 return false;
1262 }
1263
is_rev_processed() const1264 bool AudioProcessingImpl::is_rev_processed() const {
1265 return constants_.intelligibility_enabled &&
1266 public_submodules_->intelligibility_enhancer->active();
1267 }
1268
render_check_rev_conversion_needed() const1269 bool AudioProcessingImpl::render_check_rev_conversion_needed() const {
1270 return rev_conversion_needed();
1271 }
1272
rev_conversion_needed() const1273 bool AudioProcessingImpl::rev_conversion_needed() const {
1274 return (formats_.api_format.reverse_input_stream() !=
1275 formats_.api_format.reverse_output_stream());
1276 }
1277
InitializeExperimentalAgc()1278 void AudioProcessingImpl::InitializeExperimentalAgc() {
1279 if (constants_.use_new_agc) {
1280 if (!private_submodules_->agc_manager.get()) {
1281 private_submodules_->agc_manager.reset(new AgcManagerDirect(
1282 public_submodules_->gain_control,
1283 public_submodules_->gain_control_for_new_agc.get(),
1284 constants_.agc_startup_min_volume));
1285 }
1286 private_submodules_->agc_manager->Initialize();
1287 private_submodules_->agc_manager->SetCaptureMuted(
1288 capture_.output_will_be_muted);
1289 }
1290 }
1291
InitializeTransient()1292 void AudioProcessingImpl::InitializeTransient() {
1293 if (capture_.transient_suppressor_enabled) {
1294 if (!public_submodules_->transient_suppressor.get()) {
1295 public_submodules_->transient_suppressor.reset(new TransientSuppressor());
1296 }
1297 public_submodules_->transient_suppressor->Initialize(
1298 capture_nonlocked_.fwd_proc_format.sample_rate_hz(),
1299 capture_nonlocked_.split_rate,
1300 num_proc_channels());
1301 }
1302 }
1303
InitializeBeamformer()1304 void AudioProcessingImpl::InitializeBeamformer() {
1305 if (capture_nonlocked_.beamformer_enabled) {
1306 if (!private_submodules_->beamformer) {
1307 private_submodules_->beamformer.reset(new NonlinearBeamformer(
1308 capture_.array_geometry, capture_.target_direction));
1309 }
1310 private_submodules_->beamformer->Initialize(kChunkSizeMs,
1311 capture_nonlocked_.split_rate);
1312 }
1313 }
1314
InitializeIntelligibility()1315 void AudioProcessingImpl::InitializeIntelligibility() {
1316 if (constants_.intelligibility_enabled) {
1317 IntelligibilityEnhancer::Config config;
1318 config.sample_rate_hz = capture_nonlocked_.split_rate;
1319 config.num_capture_channels = capture_.capture_audio->num_channels();
1320 config.num_render_channels = render_.render_audio->num_channels();
1321 public_submodules_->intelligibility_enhancer.reset(
1322 new IntelligibilityEnhancer(config));
1323 }
1324 }
1325
InitializeHighPassFilter()1326 void AudioProcessingImpl::InitializeHighPassFilter() {
1327 public_submodules_->high_pass_filter->Initialize(num_proc_channels(),
1328 proc_sample_rate_hz());
1329 }
1330
InitializeNoiseSuppression()1331 void AudioProcessingImpl::InitializeNoiseSuppression() {
1332 public_submodules_->noise_suppression->Initialize(num_proc_channels(),
1333 proc_sample_rate_hz());
1334 }
1335
InitializeLevelEstimator()1336 void AudioProcessingImpl::InitializeLevelEstimator() {
1337 public_submodules_->level_estimator->Initialize();
1338 }
1339
InitializeVoiceDetection()1340 void AudioProcessingImpl::InitializeVoiceDetection() {
1341 public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
1342 }
1343
MaybeUpdateHistograms()1344 void AudioProcessingImpl::MaybeUpdateHistograms() {
1345 static const int kMinDiffDelayMs = 60;
1346
1347 if (echo_cancellation()->is_enabled()) {
1348 // Activate delay_jumps_ counters if we know echo_cancellation is runnning.
1349 // If a stream has echo we know that the echo_cancellation is in process.
1350 if (capture_.stream_delay_jumps == -1 &&
1351 echo_cancellation()->stream_has_echo()) {
1352 capture_.stream_delay_jumps = 0;
1353 }
1354 if (capture_.aec_system_delay_jumps == -1 &&
1355 echo_cancellation()->stream_has_echo()) {
1356 capture_.aec_system_delay_jumps = 0;
1357 }
1358
1359 // Detect a jump in platform reported system delay and log the difference.
1360 const int diff_stream_delay_ms =
1361 capture_nonlocked_.stream_delay_ms - capture_.last_stream_delay_ms;
1362 if (diff_stream_delay_ms > kMinDiffDelayMs &&
1363 capture_.last_stream_delay_ms != 0) {
1364 RTC_HISTOGRAM_COUNTS_SPARSE(
1365 "WebRTC.Audio.PlatformReportedStreamDelayJump", diff_stream_delay_ms,
1366 kMinDiffDelayMs, 1000, 100);
1367 if (capture_.stream_delay_jumps == -1) {
1368 capture_.stream_delay_jumps = 0; // Activate counter if needed.
1369 }
1370 capture_.stream_delay_jumps++;
1371 }
1372 capture_.last_stream_delay_ms = capture_nonlocked_.stream_delay_ms;
1373
1374 // Detect a jump in AEC system delay and log the difference.
1375 const int frames_per_ms =
1376 rtc::CheckedDivExact(capture_nonlocked_.split_rate, 1000);
1377 const int aec_system_delay_ms =
1378 WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms;
1379 const int diff_aec_system_delay_ms =
1380 aec_system_delay_ms - capture_.last_aec_system_delay_ms;
1381 if (diff_aec_system_delay_ms > kMinDiffDelayMs &&
1382 capture_.last_aec_system_delay_ms != 0) {
1383 RTC_HISTOGRAM_COUNTS_SPARSE("WebRTC.Audio.AecSystemDelayJump",
1384 diff_aec_system_delay_ms, kMinDiffDelayMs,
1385 1000, 100);
1386 if (capture_.aec_system_delay_jumps == -1) {
1387 capture_.aec_system_delay_jumps = 0; // Activate counter if needed.
1388 }
1389 capture_.aec_system_delay_jumps++;
1390 }
1391 capture_.last_aec_system_delay_ms = aec_system_delay_ms;
1392 }
1393 }
1394
UpdateHistogramsOnCallEnd()1395 void AudioProcessingImpl::UpdateHistogramsOnCallEnd() {
1396 // Run in a single-threaded manner.
1397 rtc::CritScope cs_render(&crit_render_);
1398 rtc::CritScope cs_capture(&crit_capture_);
1399
1400 if (capture_.stream_delay_jumps > -1) {
1401 RTC_HISTOGRAM_ENUMERATION_SPARSE(
1402 "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps",
1403 capture_.stream_delay_jumps, 51);
1404 }
1405 capture_.stream_delay_jumps = -1;
1406 capture_.last_stream_delay_ms = 0;
1407
1408 if (capture_.aec_system_delay_jumps > -1) {
1409 RTC_HISTOGRAM_ENUMERATION_SPARSE("WebRTC.Audio.NumOfAecSystemDelayJumps",
1410 capture_.aec_system_delay_jumps, 51);
1411 }
1412 capture_.aec_system_delay_jumps = -1;
1413 capture_.last_aec_system_delay_ms = 0;
1414 }
1415
1416 #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
WriteMessageToDebugFile(FileWrapper * debug_file,rtc::CriticalSection * crit_debug,ApmDebugDumpThreadState * debug_state)1417 int AudioProcessingImpl::WriteMessageToDebugFile(
1418 FileWrapper* debug_file,
1419 rtc::CriticalSection* crit_debug,
1420 ApmDebugDumpThreadState* debug_state) {
1421 int32_t size = debug_state->event_msg->ByteSize();
1422 if (size <= 0) {
1423 return kUnspecifiedError;
1424 }
1425 #if defined(WEBRTC_ARCH_BIG_ENDIAN)
1426 // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
1427 // pretty safe in assuming little-endian.
1428 #endif
1429
1430 if (!debug_state->event_msg->SerializeToString(&debug_state->event_str)) {
1431 return kUnspecifiedError;
1432 }
1433
1434 {
1435 // Ensure atomic writes of the message.
1436 rtc::CritScope cs_capture(crit_debug);
1437 // Write message preceded by its size.
1438 if (!debug_file->Write(&size, sizeof(int32_t))) {
1439 return kFileError;
1440 }
1441 if (!debug_file->Write(debug_state->event_str.data(),
1442 debug_state->event_str.length())) {
1443 return kFileError;
1444 }
1445 }
1446
1447 debug_state->event_msg->Clear();
1448
1449 return kNoError;
1450 }
1451
WriteInitMessage()1452 int AudioProcessingImpl::WriteInitMessage() {
1453 debug_dump_.capture.event_msg->set_type(audioproc::Event::INIT);
1454 audioproc::Init* msg = debug_dump_.capture.event_msg->mutable_init();
1455 msg->set_sample_rate(formats_.api_format.input_stream().sample_rate_hz());
1456
1457 msg->set_num_input_channels(static_cast<google::protobuf::int32>(
1458 formats_.api_format.input_stream().num_channels()));
1459 msg->set_num_output_channels(static_cast<google::protobuf::int32>(
1460 formats_.api_format.output_stream().num_channels()));
1461 msg->set_num_reverse_channels(static_cast<google::protobuf::int32>(
1462 formats_.api_format.reverse_input_stream().num_channels()));
1463 msg->set_reverse_sample_rate(
1464 formats_.api_format.reverse_input_stream().sample_rate_hz());
1465 msg->set_output_sample_rate(
1466 formats_.api_format.output_stream().sample_rate_hz());
1467 // TODO(ekmeyerson): Add reverse output fields to
1468 // debug_dump_.capture.event_msg.
1469
1470 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
1471 &crit_debug_, &debug_dump_.capture));
1472 return kNoError;
1473 }
1474
WriteConfigMessage(bool forced)1475 int AudioProcessingImpl::WriteConfigMessage(bool forced) {
1476 audioproc::Config config;
1477
1478 config.set_aec_enabled(public_submodules_->echo_cancellation->is_enabled());
1479 config.set_aec_delay_agnostic_enabled(
1480 public_submodules_->echo_cancellation->is_delay_agnostic_enabled());
1481 config.set_aec_drift_compensation_enabled(
1482 public_submodules_->echo_cancellation->is_drift_compensation_enabled());
1483 config.set_aec_extended_filter_enabled(
1484 public_submodules_->echo_cancellation->is_extended_filter_enabled());
1485 config.set_aec_suppression_level(static_cast<int>(
1486 public_submodules_->echo_cancellation->suppression_level()));
1487
1488 config.set_aecm_enabled(
1489 public_submodules_->echo_control_mobile->is_enabled());
1490 config.set_aecm_comfort_noise_enabled(
1491 public_submodules_->echo_control_mobile->is_comfort_noise_enabled());
1492 config.set_aecm_routing_mode(static_cast<int>(
1493 public_submodules_->echo_control_mobile->routing_mode()));
1494
1495 config.set_agc_enabled(public_submodules_->gain_control->is_enabled());
1496 config.set_agc_mode(
1497 static_cast<int>(public_submodules_->gain_control->mode()));
1498 config.set_agc_limiter_enabled(
1499 public_submodules_->gain_control->is_limiter_enabled());
1500 config.set_noise_robust_agc_enabled(constants_.use_new_agc);
1501
1502 config.set_hpf_enabled(public_submodules_->high_pass_filter->is_enabled());
1503
1504 config.set_ns_enabled(public_submodules_->noise_suppression->is_enabled());
1505 config.set_ns_level(
1506 static_cast<int>(public_submodules_->noise_suppression->level()));
1507
1508 config.set_transient_suppression_enabled(
1509 capture_.transient_suppressor_enabled);
1510
1511 std::string serialized_config = config.SerializeAsString();
1512 if (!forced &&
1513 debug_dump_.capture.last_serialized_config == serialized_config) {
1514 return kNoError;
1515 }
1516
1517 debug_dump_.capture.last_serialized_config = serialized_config;
1518
1519 debug_dump_.capture.event_msg->set_type(audioproc::Event::CONFIG);
1520 debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config);
1521
1522 RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
1523 &crit_debug_, &debug_dump_.capture));
1524 return kNoError;
1525 }
1526 #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
1527
1528 } // namespace webrtc
1529