1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/audio_processing_impl.h"
12
13 #include <algorithm>
14 #include <cstdint>
15 #include <memory>
16 #include <string>
17 #include <type_traits>
18 #include <utility>
19
20 #include "absl/types/optional.h"
21 #include "api/array_view.h"
22 #include "api/audio/audio_frame.h"
23 #include "common_audio/audio_converter.h"
24 #include "common_audio/include/audio_util.h"
25 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
26 #include "modules/audio_processing/agc2/gain_applier.h"
27 #include "modules/audio_processing/audio_buffer.h"
28 #include "modules/audio_processing/common.h"
29 #include "modules/audio_processing/include/audio_frame_view.h"
30 #include "modules/audio_processing/logging/apm_data_dumper.h"
31 #include "modules/audio_processing/optionally_built_submodule_creators.h"
32 #include "rtc_base/atomic_ops.h"
33 #include "rtc_base/checks.h"
34 #include "rtc_base/constructor_magic.h"
35 #include "rtc_base/logging.h"
36 #include "rtc_base/ref_counted_object.h"
37 #include "rtc_base/time_utils.h"
38 #include "rtc_base/trace_event.h"
39 #include "system_wrappers/include/field_trial.h"
40 #include "system_wrappers/include/metrics.h"
41
42 #define RETURN_ON_ERR(expr) \
43 do { \
44 int err = (expr); \
45 if (err != kNoError) { \
46 return err; \
47 } \
48 } while (0)
49
50 namespace webrtc {
51
52 constexpr int kRuntimeSettingQueueSize = 100;
53
54 namespace {
55
LayoutHasKeyboard(AudioProcessing::ChannelLayout layout)56 static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) {
57 switch (layout) {
58 case AudioProcessing::kMono:
59 case AudioProcessing::kStereo:
60 return false;
61 case AudioProcessing::kMonoAndKeyboard:
62 case AudioProcessing::kStereoAndKeyboard:
63 return true;
64 }
65
66 RTC_NOTREACHED();
67 return false;
68 }
69
SampleRateSupportsMultiBand(int sample_rate_hz)70 bool SampleRateSupportsMultiBand(int sample_rate_hz) {
71 return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
72 sample_rate_hz == AudioProcessing::kSampleRate48kHz;
73 }
74
75 // Checks whether the high-pass filter should be done in the full-band.
EnforceSplitBandHpf()76 bool EnforceSplitBandHpf() {
77 return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch");
78 }
79
80 // Checks whether AEC3 should be allowed to decide what the default
81 // configuration should be based on the render and capture channel configuration
82 // at hand.
UseSetupSpecificDefaultAec3Congfig()83 bool UseSetupSpecificDefaultAec3Congfig() {
84 return !field_trial::IsEnabled(
85 "WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch");
86 }
87
88 // Identify the native processing rate that best handles a sample rate.
SuitableProcessRate(int minimum_rate,int max_splitting_rate,bool band_splitting_required)89 int SuitableProcessRate(int minimum_rate,
90 int max_splitting_rate,
91 bool band_splitting_required) {
92 const int uppermost_native_rate =
93 band_splitting_required ? max_splitting_rate : 48000;
94 for (auto rate : {16000, 32000, 48000}) {
95 if (rate >= uppermost_native_rate) {
96 return uppermost_native_rate;
97 }
98 if (rate >= minimum_rate) {
99 return rate;
100 }
101 }
102 RTC_NOTREACHED();
103 return uppermost_native_rate;
104 }
105
Agc1ConfigModeToInterfaceMode(AudioProcessing::Config::GainController1::Mode mode)106 GainControl::Mode Agc1ConfigModeToInterfaceMode(
107 AudioProcessing::Config::GainController1::Mode mode) {
108 using Agc1Config = AudioProcessing::Config::GainController1;
109 switch (mode) {
110 case Agc1Config::kAdaptiveAnalog:
111 return GainControl::kAdaptiveAnalog;
112 case Agc1Config::kAdaptiveDigital:
113 return GainControl::kAdaptiveDigital;
114 case Agc1Config::kFixedDigital:
115 return GainControl::kFixedDigital;
116 }
117 }
118
119 // Maximum lengths that frame of samples being passed from the render side to
120 // the capture side can have (does not apply to AEC3).
121 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
122 static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
123
124 // Maximum number of frames to buffer in the render queue.
125 // TODO(peah): Decrease this once we properly handle hugely unbalanced
126 // reverse and forward call numbers.
127 static const size_t kMaxNumFramesToBuffer = 100;
128 } // namespace
129
130 // Throughout webrtc, it's assumed that success is represented by zero.
131 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
132
SubmoduleStates(bool capture_post_processor_enabled,bool render_pre_processor_enabled,bool capture_analyzer_enabled)133 AudioProcessingImpl::SubmoduleStates::SubmoduleStates(
134 bool capture_post_processor_enabled,
135 bool render_pre_processor_enabled,
136 bool capture_analyzer_enabled)
137 : capture_post_processor_enabled_(capture_post_processor_enabled),
138 render_pre_processor_enabled_(render_pre_processor_enabled),
139 capture_analyzer_enabled_(capture_analyzer_enabled) {}
140
Update(bool high_pass_filter_enabled,bool mobile_echo_controller_enabled,bool residual_echo_detector_enabled,bool noise_suppressor_enabled,bool adaptive_gain_controller_enabled,bool gain_controller2_enabled,bool pre_amplifier_enabled,bool echo_controller_enabled,bool voice_detector_enabled,bool transient_suppressor_enabled)141 bool AudioProcessingImpl::SubmoduleStates::Update(
142 bool high_pass_filter_enabled,
143 bool mobile_echo_controller_enabled,
144 bool residual_echo_detector_enabled,
145 bool noise_suppressor_enabled,
146 bool adaptive_gain_controller_enabled,
147 bool gain_controller2_enabled,
148 bool pre_amplifier_enabled,
149 bool echo_controller_enabled,
150 bool voice_detector_enabled,
151 bool transient_suppressor_enabled) {
152 bool changed = false;
153 changed |= (high_pass_filter_enabled != high_pass_filter_enabled_);
154 changed |=
155 (mobile_echo_controller_enabled != mobile_echo_controller_enabled_);
156 changed |=
157 (residual_echo_detector_enabled != residual_echo_detector_enabled_);
158 changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
159 changed |=
160 (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
161 changed |= (gain_controller2_enabled != gain_controller2_enabled_);
162 changed |= (pre_amplifier_enabled_ != pre_amplifier_enabled);
163 changed |= (echo_controller_enabled != echo_controller_enabled_);
164 changed |= (voice_detector_enabled != voice_detector_enabled_);
165 changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
166 if (changed) {
167 high_pass_filter_enabled_ = high_pass_filter_enabled;
168 mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
169 residual_echo_detector_enabled_ = residual_echo_detector_enabled;
170 noise_suppressor_enabled_ = noise_suppressor_enabled;
171 adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
172 gain_controller2_enabled_ = gain_controller2_enabled;
173 pre_amplifier_enabled_ = pre_amplifier_enabled;
174 echo_controller_enabled_ = echo_controller_enabled;
175 voice_detector_enabled_ = voice_detector_enabled;
176 transient_suppressor_enabled_ = transient_suppressor_enabled;
177 }
178
179 changed |= first_update_;
180 first_update_ = false;
181 return changed;
182 }
183
CaptureMultiBandSubModulesActive() const184 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandSubModulesActive()
185 const {
186 return CaptureMultiBandProcessingPresent() || voice_detector_enabled_;
187 }
188
CaptureMultiBandProcessingPresent() const189 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingPresent()
190 const {
191 // If echo controller is present, assume it performs active processing.
192 return CaptureMultiBandProcessingActive(/*ec_processing_active=*/true);
193 }
194
CaptureMultiBandProcessingActive(bool ec_processing_active) const195 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingActive(
196 bool ec_processing_active) const {
197 return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
198 noise_suppressor_enabled_ || adaptive_gain_controller_enabled_ ||
199 (echo_controller_enabled_ && ec_processing_active);
200 }
201
CaptureFullBandProcessingActive() const202 bool AudioProcessingImpl::SubmoduleStates::CaptureFullBandProcessingActive()
203 const {
204 return gain_controller2_enabled_ || capture_post_processor_enabled_ ||
205 pre_amplifier_enabled_;
206 }
207
CaptureAnalyzerActive() const208 bool AudioProcessingImpl::SubmoduleStates::CaptureAnalyzerActive() const {
209 return capture_analyzer_enabled_;
210 }
211
RenderMultiBandSubModulesActive() const212 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandSubModulesActive()
213 const {
214 return RenderMultiBandProcessingActive() || mobile_echo_controller_enabled_ ||
215 adaptive_gain_controller_enabled_ || echo_controller_enabled_;
216 }
217
RenderFullBandProcessingActive() const218 bool AudioProcessingImpl::SubmoduleStates::RenderFullBandProcessingActive()
219 const {
220 return render_pre_processor_enabled_;
221 }
222
RenderMultiBandProcessingActive() const223 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandProcessingActive()
224 const {
225 return false;
226 }
227
HighPassFilteringRequired() const228 bool AudioProcessingImpl::SubmoduleStates::HighPassFilteringRequired() const {
229 return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
230 noise_suppressor_enabled_;
231 }
232
AudioProcessingImpl(const webrtc::Config & config)233 AudioProcessingImpl::AudioProcessingImpl(const webrtc::Config& config)
234 : AudioProcessingImpl(config,
235 /*capture_post_processor=*/nullptr,
236 /*render_pre_processor=*/nullptr,
237 /*echo_control_factory=*/nullptr,
238 /*echo_detector=*/nullptr,
239 /*capture_analyzer=*/nullptr) {}
240
241 int AudioProcessingImpl::instance_count_ = 0;
242
AudioProcessingImpl(const webrtc::Config & config,std::unique_ptr<CustomProcessing> capture_post_processor,std::unique_ptr<CustomProcessing> render_pre_processor,std::unique_ptr<EchoControlFactory> echo_control_factory,rtc::scoped_refptr<EchoDetector> echo_detector,std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)243 AudioProcessingImpl::AudioProcessingImpl(
244 const webrtc::Config& config,
245 std::unique_ptr<CustomProcessing> capture_post_processor,
246 std::unique_ptr<CustomProcessing> render_pre_processor,
247 std::unique_ptr<EchoControlFactory> echo_control_factory,
248 rtc::scoped_refptr<EchoDetector> echo_detector,
249 std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)
250 : data_dumper_(
251 new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
252 use_setup_specific_default_aec3_config_(
253 UseSetupSpecificDefaultAec3Congfig()),
254 capture_runtime_settings_(kRuntimeSettingQueueSize),
255 render_runtime_settings_(kRuntimeSettingQueueSize),
256 capture_runtime_settings_enqueuer_(&capture_runtime_settings_),
257 render_runtime_settings_enqueuer_(&render_runtime_settings_),
258 echo_control_factory_(std::move(echo_control_factory)),
259 submodule_states_(!!capture_post_processor,
260 !!render_pre_processor,
261 !!capture_analyzer),
262 submodules_(std::move(capture_post_processor),
263 std::move(render_pre_processor),
264 std::move(echo_detector),
265 std::move(capture_analyzer)),
266 constants_(!field_trial::IsEnabled(
267 "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
268 !field_trial::IsEnabled(
269 "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
270 EnforceSplitBandHpf()),
271 capture_nonlocked_() {
272 RTC_LOG(LS_INFO) << "Injected APM submodules:"
273 "\nEcho control factory: "
274 << !!echo_control_factory_
275 << "\nEcho detector: " << !!submodules_.echo_detector
276 << "\nCapture analyzer: " << !!submodules_.capture_analyzer
277 << "\nCapture post processor: "
278 << !!submodules_.capture_post_processor
279 << "\nRender pre processor: "
280 << !!submodules_.render_pre_processor;
281
282 // Mark Echo Controller enabled if a factory is injected.
283 capture_nonlocked_.echo_controller_enabled =
284 static_cast<bool>(echo_control_factory_);
285
286 // If no echo detector is injected, use the ResidualEchoDetector.
287 if (!submodules_.echo_detector) {
288 submodules_.echo_detector =
289 new rtc::RefCountedObject<ResidualEchoDetector>();
290 }
291
292 #if !(defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS))
293 // TODO(webrtc:5298): Remove once the use of ExperimentalNs has been
294 // deprecated.
295 config_.transient_suppression.enabled = config.Get<ExperimentalNs>().enabled;
296
297 // TODO(webrtc:5298): Remove once the use of ExperimentalAgc has been
298 // deprecated.
299 config_.gain_controller1.analog_gain_controller.enabled =
300 config.Get<ExperimentalAgc>().enabled;
301 config_.gain_controller1.analog_gain_controller.startup_min_volume =
302 config.Get<ExperimentalAgc>().startup_min_volume;
303 config_.gain_controller1.analog_gain_controller.clipped_level_min =
304 config.Get<ExperimentalAgc>().clipped_level_min;
305 config_.gain_controller1.analog_gain_controller.enable_agc2_level_estimator =
306 config.Get<ExperimentalAgc>().enabled_agc2_level_estimator;
307 config_.gain_controller1.analog_gain_controller.enable_digital_adaptive =
308 !config.Get<ExperimentalAgc>().digital_adaptive_disabled;
309 #endif
310 }
311
312 AudioProcessingImpl::~AudioProcessingImpl() = default;
313
Initialize()314 int AudioProcessingImpl::Initialize() {
315 // Run in a single-threaded manner during initialization.
316 MutexLock lock_render(&mutex_render_);
317 MutexLock lock_capture(&mutex_capture_);
318 return InitializeLocked();
319 }
320
Initialize(int capture_input_sample_rate_hz,int capture_output_sample_rate_hz,int render_input_sample_rate_hz,ChannelLayout capture_input_layout,ChannelLayout capture_output_layout,ChannelLayout render_input_layout)321 int AudioProcessingImpl::Initialize(int capture_input_sample_rate_hz,
322 int capture_output_sample_rate_hz,
323 int render_input_sample_rate_hz,
324 ChannelLayout capture_input_layout,
325 ChannelLayout capture_output_layout,
326 ChannelLayout render_input_layout) {
327 const ProcessingConfig processing_config = {
328 {{capture_input_sample_rate_hz, ChannelsFromLayout(capture_input_layout),
329 LayoutHasKeyboard(capture_input_layout)},
330 {capture_output_sample_rate_hz,
331 ChannelsFromLayout(capture_output_layout),
332 LayoutHasKeyboard(capture_output_layout)},
333 {render_input_sample_rate_hz, ChannelsFromLayout(render_input_layout),
334 LayoutHasKeyboard(render_input_layout)},
335 {render_input_sample_rate_hz, ChannelsFromLayout(render_input_layout),
336 LayoutHasKeyboard(render_input_layout)}}};
337
338 return Initialize(processing_config);
339 }
340
Initialize(const ProcessingConfig & processing_config)341 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
342 // Run in a single-threaded manner during initialization.
343 MutexLock lock_render(&mutex_render_);
344 MutexLock lock_capture(&mutex_capture_);
345 return InitializeLocked(processing_config);
346 }
347
MaybeInitializeRender(const ProcessingConfig & processing_config)348 int AudioProcessingImpl::MaybeInitializeRender(
349 const ProcessingConfig& processing_config) {
350 // Called from both threads. Thread check is therefore not possible.
351 if (processing_config == formats_.api_format) {
352 return kNoError;
353 }
354
355 MutexLock lock_capture(&mutex_capture_);
356 return InitializeLocked(processing_config);
357 }
358
InitializeLocked()359 int AudioProcessingImpl::InitializeLocked() {
360 UpdateActiveSubmoduleStates();
361
362 const int render_audiobuffer_sample_rate_hz =
363 formats_.api_format.reverse_output_stream().num_frames() == 0
364 ? formats_.render_processing_format.sample_rate_hz()
365 : formats_.api_format.reverse_output_stream().sample_rate_hz();
366 if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
367 render_.render_audio.reset(new AudioBuffer(
368 formats_.api_format.reverse_input_stream().sample_rate_hz(),
369 formats_.api_format.reverse_input_stream().num_channels(),
370 formats_.render_processing_format.sample_rate_hz(),
371 formats_.render_processing_format.num_channels(),
372 render_audiobuffer_sample_rate_hz,
373 formats_.render_processing_format.num_channels()));
374 if (formats_.api_format.reverse_input_stream() !=
375 formats_.api_format.reverse_output_stream()) {
376 render_.render_converter = AudioConverter::Create(
377 formats_.api_format.reverse_input_stream().num_channels(),
378 formats_.api_format.reverse_input_stream().num_frames(),
379 formats_.api_format.reverse_output_stream().num_channels(),
380 formats_.api_format.reverse_output_stream().num_frames());
381 } else {
382 render_.render_converter.reset(nullptr);
383 }
384 } else {
385 render_.render_audio.reset(nullptr);
386 render_.render_converter.reset(nullptr);
387 }
388
389 capture_.capture_audio.reset(new AudioBuffer(
390 formats_.api_format.input_stream().sample_rate_hz(),
391 formats_.api_format.input_stream().num_channels(),
392 capture_nonlocked_.capture_processing_format.sample_rate_hz(),
393 formats_.api_format.output_stream().num_channels(),
394 formats_.api_format.output_stream().sample_rate_hz(),
395 formats_.api_format.output_stream().num_channels()));
396
397 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() <
398 formats_.api_format.output_stream().sample_rate_hz() &&
399 formats_.api_format.output_stream().sample_rate_hz() == 48000) {
400 capture_.capture_fullband_audio.reset(
401 new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(),
402 formats_.api_format.input_stream().num_channels(),
403 formats_.api_format.output_stream().sample_rate_hz(),
404 formats_.api_format.output_stream().num_channels(),
405 formats_.api_format.output_stream().sample_rate_hz(),
406 formats_.api_format.output_stream().num_channels()));
407 } else {
408 capture_.capture_fullband_audio.reset();
409 }
410
411 AllocateRenderQueue();
412
413 InitializeGainController1();
414 InitializeTransientSuppressor();
415 InitializeHighPassFilter(true);
416 InitializeVoiceDetector();
417 InitializeResidualEchoDetector();
418 InitializeEchoController();
419 InitializeGainController2();
420 InitializeNoiseSuppressor();
421 InitializeAnalyzer();
422 InitializePostProcessor();
423 InitializePreProcessor();
424
425 if (aec_dump_) {
426 aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
427 }
428 return kNoError;
429 }
430
InitializeLocked(const ProcessingConfig & config)431 int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
432 UpdateActiveSubmoduleStates();
433
434 for (const auto& stream : config.streams) {
435 if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) {
436 return kBadSampleRateError;
437 }
438 }
439
440 const size_t num_in_channels = config.input_stream().num_channels();
441 const size_t num_out_channels = config.output_stream().num_channels();
442
443 // Need at least one input channel.
444 // Need either one output channel or as many outputs as there are inputs.
445 if (num_in_channels == 0 ||
446 !(num_out_channels == 1 || num_out_channels == num_in_channels)) {
447 return kBadNumberChannelsError;
448 }
449
450 formats_.api_format = config;
451
452 // Choose maximum rate to use for the split filtering.
453 RTC_DCHECK(config_.pipeline.maximum_internal_processing_rate == 48000 ||
454 config_.pipeline.maximum_internal_processing_rate == 32000);
455 int max_splitting_rate = 48000;
456 if (config_.pipeline.maximum_internal_processing_rate == 32000) {
457 max_splitting_rate = config_.pipeline.maximum_internal_processing_rate;
458 }
459
460 int capture_processing_rate = SuitableProcessRate(
461 std::min(formats_.api_format.input_stream().sample_rate_hz(),
462 formats_.api_format.output_stream().sample_rate_hz()),
463 max_splitting_rate,
464 submodule_states_.CaptureMultiBandSubModulesActive() ||
465 submodule_states_.RenderMultiBandSubModulesActive());
466 RTC_DCHECK_NE(8000, capture_processing_rate);
467
468 capture_nonlocked_.capture_processing_format =
469 StreamConfig(capture_processing_rate);
470
471 int render_processing_rate;
472 if (!capture_nonlocked_.echo_controller_enabled) {
473 render_processing_rate = SuitableProcessRate(
474 std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
475 formats_.api_format.reverse_output_stream().sample_rate_hz()),
476 max_splitting_rate,
477 submodule_states_.CaptureMultiBandSubModulesActive() ||
478 submodule_states_.RenderMultiBandSubModulesActive());
479 } else {
480 render_processing_rate = capture_processing_rate;
481 }
482
483 // If the forward sample rate is 8 kHz, the render stream is also processed
484 // at this rate.
485 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
486 kSampleRate8kHz) {
487 render_processing_rate = kSampleRate8kHz;
488 } else {
489 render_processing_rate =
490 std::max(render_processing_rate, static_cast<int>(kSampleRate16kHz));
491 }
492
493 RTC_DCHECK_NE(8000, render_processing_rate);
494
495 if (submodule_states_.RenderMultiBandSubModulesActive()) {
496 // By default, downmix the render stream to mono for analysis. This has been
497 // demonstrated to work well for AEC in most practical scenarios.
498 const bool multi_channel_render = config_.pipeline.multi_channel_render &&
499 constants_.multi_channel_render_support;
500 int render_processing_num_channels =
501 multi_channel_render
502 ? formats_.api_format.reverse_input_stream().num_channels()
503 : 1;
504 formats_.render_processing_format =
505 StreamConfig(render_processing_rate, render_processing_num_channels);
506 } else {
507 formats_.render_processing_format = StreamConfig(
508 formats_.api_format.reverse_input_stream().sample_rate_hz(),
509 formats_.api_format.reverse_input_stream().num_channels());
510 }
511
512 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
513 kSampleRate32kHz ||
514 capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
515 kSampleRate48kHz) {
516 capture_nonlocked_.split_rate = kSampleRate16kHz;
517 } else {
518 capture_nonlocked_.split_rate =
519 capture_nonlocked_.capture_processing_format.sample_rate_hz();
520 }
521
522 return InitializeLocked();
523 }
524
ApplyConfig(const AudioProcessing::Config & config)525 void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
526 RTC_LOG(LS_INFO) << "AudioProcessing::ApplyConfig: " << config.ToString();
527
528 // Run in a single-threaded manner when applying the settings.
529 MutexLock lock_render(&mutex_render_);
530 MutexLock lock_capture(&mutex_capture_);
531
532 const bool pipeline_config_changed =
533 config_.pipeline.multi_channel_render !=
534 config.pipeline.multi_channel_render ||
535 config_.pipeline.multi_channel_capture !=
536 config.pipeline.multi_channel_capture ||
537 config_.pipeline.maximum_internal_processing_rate !=
538 config.pipeline.maximum_internal_processing_rate;
539
540 const bool aec_config_changed =
541 config_.echo_canceller.enabled != config.echo_canceller.enabled ||
542 config_.echo_canceller.mobile_mode != config.echo_canceller.mobile_mode;
543
544 const bool agc1_config_changed =
545 config_.gain_controller1.enabled != config.gain_controller1.enabled ||
546 config_.gain_controller1.mode != config.gain_controller1.mode ||
547 config_.gain_controller1.target_level_dbfs !=
548 config.gain_controller1.target_level_dbfs ||
549 config_.gain_controller1.compression_gain_db !=
550 config.gain_controller1.compression_gain_db ||
551 config_.gain_controller1.enable_limiter !=
552 config.gain_controller1.enable_limiter ||
553 config_.gain_controller1.analog_level_minimum !=
554 config.gain_controller1.analog_level_minimum ||
555 config_.gain_controller1.analog_level_maximum !=
556 config.gain_controller1.analog_level_maximum ||
557 config_.gain_controller1.analog_gain_controller.enabled !=
558 config.gain_controller1.analog_gain_controller.enabled ||
559 config_.gain_controller1.analog_gain_controller.startup_min_volume !=
560 config.gain_controller1.analog_gain_controller.startup_min_volume ||
561 config_.gain_controller1.analog_gain_controller.clipped_level_min !=
562 config.gain_controller1.analog_gain_controller.clipped_level_min ||
563 config_.gain_controller1.analog_gain_controller
564 .enable_agc2_level_estimator !=
565 config.gain_controller1.analog_gain_controller
566 .enable_agc2_level_estimator ||
567 config_.gain_controller1.analog_gain_controller.enable_digital_adaptive !=
568 config.gain_controller1.analog_gain_controller
569 .enable_digital_adaptive;
570
571 const bool agc2_config_changed =
572 config_.gain_controller2.enabled != config.gain_controller2.enabled;
573
574 const bool voice_detection_config_changed =
575 config_.voice_detection.enabled != config.voice_detection.enabled;
576
577 const bool ns_config_changed =
578 config_.noise_suppression.enabled != config.noise_suppression.enabled ||
579 config_.noise_suppression.level != config.noise_suppression.level;
580
581 const bool ts_config_changed = config_.transient_suppression.enabled !=
582 config.transient_suppression.enabled;
583
584 const bool pre_amplifier_config_changed =
585 config_.pre_amplifier.enabled != config.pre_amplifier.enabled ||
586 config_.pre_amplifier.fixed_gain_factor !=
587 config.pre_amplifier.fixed_gain_factor;
588
589 config_ = config;
590
591 if (aec_config_changed) {
592 InitializeEchoController();
593 }
594
595 if (ns_config_changed) {
596 InitializeNoiseSuppressor();
597 }
598
599 if (ts_config_changed) {
600 InitializeTransientSuppressor();
601 }
602
603 InitializeHighPassFilter(false);
604
605 if (agc1_config_changed) {
606 InitializeGainController1();
607 }
608
609 const bool config_ok = GainController2::Validate(config_.gain_controller2);
610 if (!config_ok) {
611 RTC_LOG(LS_ERROR) << "AudioProcessing module config error\n"
612 "Gain Controller 2: "
613 << GainController2::ToString(config_.gain_controller2)
614 << "\nReverting to default parameter set";
615 config_.gain_controller2 = AudioProcessing::Config::GainController2();
616 }
617
618 if (agc2_config_changed) {
619 InitializeGainController2();
620 }
621
622 if (pre_amplifier_config_changed) {
623 InitializePreAmplifier();
624 }
625
626 if (config_.level_estimation.enabled && !submodules_.output_level_estimator) {
627 submodules_.output_level_estimator = std::make_unique<LevelEstimator>();
628 }
629
630 if (voice_detection_config_changed) {
631 InitializeVoiceDetector();
632 }
633
634 // Reinitialization must happen after all submodule configuration to avoid
635 // additional reinitializations on the next capture / render processing call.
636 if (pipeline_config_changed) {
637 InitializeLocked(formats_.api_format);
638 }
639 }
640
641 // TODO(webrtc:5298): Remove.
SetExtraOptions(const webrtc::Config & config)642 void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {}
643
OverrideSubmoduleCreationForTesting(const ApmSubmoduleCreationOverrides & overrides)644 void AudioProcessingImpl::OverrideSubmoduleCreationForTesting(
645 const ApmSubmoduleCreationOverrides& overrides) {
646 MutexLock lock(&mutex_capture_);
647 submodule_creation_overrides_ = overrides;
648 }
649
proc_sample_rate_hz() const650 int AudioProcessingImpl::proc_sample_rate_hz() const {
651 // Used as callback from submodules, hence locking is not allowed.
652 return capture_nonlocked_.capture_processing_format.sample_rate_hz();
653 }
654
proc_fullband_sample_rate_hz() const655 int AudioProcessingImpl::proc_fullband_sample_rate_hz() const {
656 return capture_.capture_fullband_audio
657 ? capture_.capture_fullband_audio->num_frames() * 100
658 : capture_nonlocked_.capture_processing_format.sample_rate_hz();
659 }
660
proc_split_sample_rate_hz() const661 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
662 // Used as callback from submodules, hence locking is not allowed.
663 return capture_nonlocked_.split_rate;
664 }
665
num_reverse_channels() const666 size_t AudioProcessingImpl::num_reverse_channels() const {
667 // Used as callback from submodules, hence locking is not allowed.
668 return formats_.render_processing_format.num_channels();
669 }
670
num_input_channels() const671 size_t AudioProcessingImpl::num_input_channels() const {
672 // Used as callback from submodules, hence locking is not allowed.
673 return formats_.api_format.input_stream().num_channels();
674 }
675
num_proc_channels() const676 size_t AudioProcessingImpl::num_proc_channels() const {
677 // Used as callback from submodules, hence locking is not allowed.
678 const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
679 constants_.multi_channel_capture_support;
680 if (capture_nonlocked_.echo_controller_enabled && !multi_channel_capture) {
681 return 1;
682 }
683 return num_output_channels();
684 }
685
num_output_channels() const686 size_t AudioProcessingImpl::num_output_channels() const {
687 // Used as callback from submodules, hence locking is not allowed.
688 return formats_.api_format.output_stream().num_channels();
689 }
690
set_output_will_be_muted(bool muted)691 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
692 MutexLock lock(&mutex_capture_);
693 capture_.output_will_be_muted = muted;
694 if (submodules_.agc_manager.get()) {
695 submodules_.agc_manager->SetCaptureMuted(capture_.output_will_be_muted);
696 }
697 }
698
SetRuntimeSetting(RuntimeSetting setting)699 void AudioProcessingImpl::SetRuntimeSetting(RuntimeSetting setting) {
700 switch (setting.type()) {
701 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
702 case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
703 render_runtime_settings_enqueuer_.Enqueue(setting);
704 return;
705 case RuntimeSetting::Type::kCapturePreGain:
706 case RuntimeSetting::Type::kCaptureCompressionGain:
707 case RuntimeSetting::Type::kCaptureFixedPostGain:
708 capture_runtime_settings_enqueuer_.Enqueue(setting);
709 return;
710 case RuntimeSetting::Type::kPlayoutVolumeChange:
711 capture_runtime_settings_enqueuer_.Enqueue(setting);
712 render_runtime_settings_enqueuer_.Enqueue(setting);
713 return;
714 case RuntimeSetting::Type::kNotSpecified:
715 RTC_NOTREACHED();
716 return;
717 }
718 // The language allows the enum to have a non-enumerator
719 // value. Check that this doesn't happen.
720 RTC_NOTREACHED();
721 }
722
RuntimeSettingEnqueuer(SwapQueue<RuntimeSetting> * runtime_settings)723 AudioProcessingImpl::RuntimeSettingEnqueuer::RuntimeSettingEnqueuer(
724 SwapQueue<RuntimeSetting>* runtime_settings)
725 : runtime_settings_(*runtime_settings) {
726 RTC_DCHECK(runtime_settings);
727 }
728
729 AudioProcessingImpl::RuntimeSettingEnqueuer::~RuntimeSettingEnqueuer() =
730 default;
731
Enqueue(RuntimeSetting setting)732 void AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue(
733 RuntimeSetting setting) {
734 size_t remaining_attempts = 10;
735 while (!runtime_settings_.Insert(&setting) && remaining_attempts-- > 0) {
736 RuntimeSetting setting_to_discard;
737 if (runtime_settings_.Remove(&setting_to_discard))
738 RTC_LOG(LS_ERROR)
739 << "The runtime settings queue is full. Oldest setting discarded.";
740 }
741 if (remaining_attempts == 0)
742 RTC_LOG(LS_ERROR) << "Cannot enqueue a new runtime setting.";
743 }
744
MaybeInitializeCapture(const StreamConfig & input_config,const StreamConfig & output_config)745 int AudioProcessingImpl::MaybeInitializeCapture(
746 const StreamConfig& input_config,
747 const StreamConfig& output_config) {
748 ProcessingConfig processing_config;
749 bool reinitialization_required = false;
750 {
751 // Acquire the capture lock in order to access api_format. The lock is
752 // released immediately, as we may need to acquire the render lock as part
753 // of the conditional reinitialization.
754 MutexLock lock_capture(&mutex_capture_);
755 processing_config = formats_.api_format;
756 reinitialization_required = UpdateActiveSubmoduleStates();
757 }
758
759 if (processing_config.input_stream() != input_config) {
760 processing_config.input_stream() = input_config;
761 reinitialization_required = true;
762 }
763
764 if (processing_config.output_stream() != output_config) {
765 processing_config.output_stream() = output_config;
766 reinitialization_required = true;
767 }
768
769 if (reinitialization_required) {
770 MutexLock lock_render(&mutex_render_);
771 MutexLock lock_capture(&mutex_capture_);
772 RETURN_ON_ERR(InitializeLocked(processing_config));
773 }
774 return kNoError;
775 }
776
ProcessStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)777 int AudioProcessingImpl::ProcessStream(const float* const* src,
778 const StreamConfig& input_config,
779 const StreamConfig& output_config,
780 float* const* dest) {
781 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
782 if (!src || !dest) {
783 return kNullPointerError;
784 }
785
786 RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
787
788 MutexLock lock_capture(&mutex_capture_);
789
790 if (aec_dump_) {
791 RecordUnprocessedCaptureStream(src);
792 }
793
794 capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
795 capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
796 if (capture_.capture_fullband_audio) {
797 capture_.capture_fullband_audio->CopyFrom(
798 src, formats_.api_format.input_stream());
799 }
800 RETURN_ON_ERR(ProcessCaptureStreamLocked());
801 if (capture_.capture_fullband_audio) {
802 capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(),
803 dest);
804 } else {
805 capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
806 }
807
808 if (aec_dump_) {
809 RecordProcessedCaptureStream(dest);
810 }
811 return kNoError;
812 }
813
HandleCaptureRuntimeSettings()814 void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
815 RuntimeSetting setting;
816 while (capture_runtime_settings_.Remove(&setting)) {
817 if (aec_dump_) {
818 aec_dump_->WriteRuntimeSetting(setting);
819 }
820 switch (setting.type()) {
821 case RuntimeSetting::Type::kCapturePreGain:
822 if (config_.pre_amplifier.enabled) {
823 float value;
824 setting.GetFloat(&value);
825 config_.pre_amplifier.fixed_gain_factor = value;
826 submodules_.pre_amplifier->SetGainFactor(value);
827 }
828 // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump.
829 break;
830 case RuntimeSetting::Type::kCaptureCompressionGain: {
831 if (!submodules_.agc_manager) {
832 float value;
833 setting.GetFloat(&value);
834 int int_value = static_cast<int>(value + .5f);
835 config_.gain_controller1.compression_gain_db = int_value;
836 if (submodules_.gain_control) {
837 int error =
838 submodules_.gain_control->set_compression_gain_db(int_value);
839 RTC_DCHECK_EQ(kNoError, error);
840 }
841 }
842 break;
843 }
844 case RuntimeSetting::Type::kCaptureFixedPostGain: {
845 if (submodules_.gain_controller2) {
846 float value;
847 setting.GetFloat(&value);
848 config_.gain_controller2.fixed_digital.gain_db = value;
849 submodules_.gain_controller2->ApplyConfig(config_.gain_controller2);
850 }
851 break;
852 }
853 case RuntimeSetting::Type::kPlayoutVolumeChange: {
854 int value;
855 setting.GetInt(&value);
856 capture_.playout_volume = value;
857 break;
858 }
859 case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
860 RTC_NOTREACHED();
861 break;
862 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
863 RTC_NOTREACHED();
864 break;
865 case RuntimeSetting::Type::kNotSpecified:
866 RTC_NOTREACHED();
867 break;
868 }
869 }
870 }
871
HandleRenderRuntimeSettings()872 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
873 RuntimeSetting setting;
874 while (render_runtime_settings_.Remove(&setting)) {
875 if (aec_dump_) {
876 aec_dump_->WriteRuntimeSetting(setting);
877 }
878 switch (setting.type()) {
879 case RuntimeSetting::Type::kPlayoutAudioDeviceChange: // fall-through
880 case RuntimeSetting::Type::kPlayoutVolumeChange: // fall-through
881 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
882 if (submodules_.render_pre_processor) {
883 submodules_.render_pre_processor->SetRuntimeSetting(setting);
884 }
885 break;
886 case RuntimeSetting::Type::kCapturePreGain: // fall-through
887 case RuntimeSetting::Type::kCaptureCompressionGain: // fall-through
888 case RuntimeSetting::Type::kCaptureFixedPostGain: // fall-through
889 case RuntimeSetting::Type::kNotSpecified:
890 RTC_NOTREACHED();
891 break;
892 }
893 }
894 }
895
QueueBandedRenderAudio(AudioBuffer * audio)896 void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
897 RTC_DCHECK_GE(160, audio->num_frames_per_band());
898
899 if (submodules_.echo_control_mobile) {
900 EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(),
901 num_reverse_channels(),
902 &aecm_render_queue_buffer_);
903 RTC_DCHECK(aecm_render_signal_queue_);
904 // Insert the samples into the queue.
905 if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) {
906 // The data queue is full and needs to be emptied.
907 EmptyQueuedRenderAudio();
908
909 // Retry the insert (should always work).
910 bool result =
911 aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_);
912 RTC_DCHECK(result);
913 }
914 }
915
916 if (!submodules_.agc_manager && submodules_.gain_control) {
917 GainControlImpl::PackRenderAudioBuffer(*audio, &agc_render_queue_buffer_);
918 // Insert the samples into the queue.
919 if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) {
920 // The data queue is full and needs to be emptied.
921 EmptyQueuedRenderAudio();
922
923 // Retry the insert (should always work).
924 bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_);
925 RTC_DCHECK(result);
926 }
927 }
928 }
929
QueueNonbandedRenderAudio(AudioBuffer * audio)930 void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
931 ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_);
932
933 // Insert the samples into the queue.
934 if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) {
935 // The data queue is full and needs to be emptied.
936 EmptyQueuedRenderAudio();
937
938 // Retry the insert (should always work).
939 bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_);
940 RTC_DCHECK(result);
941 }
942 }
943
AllocateRenderQueue()944 void AudioProcessingImpl::AllocateRenderQueue() {
945 const size_t new_agc_render_queue_element_max_size =
946 std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);
947
948 const size_t new_red_render_queue_element_max_size =
949 std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
950
951 // Reallocate the queues if the queue item sizes are too small to fit the
952 // data to put in the queues.
953
954 if (agc_render_queue_element_max_size_ <
955 new_agc_render_queue_element_max_size) {
956 agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size;
957
958 std::vector<int16_t> template_queue_element(
959 agc_render_queue_element_max_size_);
960
961 agc_render_signal_queue_.reset(
962 new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
963 kMaxNumFramesToBuffer, template_queue_element,
964 RenderQueueItemVerifier<int16_t>(
965 agc_render_queue_element_max_size_)));
966
967 agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_);
968 agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_);
969 } else {
970 agc_render_signal_queue_->Clear();
971 }
972
973 if (red_render_queue_element_max_size_ <
974 new_red_render_queue_element_max_size) {
975 red_render_queue_element_max_size_ = new_red_render_queue_element_max_size;
976
977 std::vector<float> template_queue_element(
978 red_render_queue_element_max_size_);
979
980 red_render_signal_queue_.reset(
981 new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>(
982 kMaxNumFramesToBuffer, template_queue_element,
983 RenderQueueItemVerifier<float>(
984 red_render_queue_element_max_size_)));
985
986 red_render_queue_buffer_.resize(red_render_queue_element_max_size_);
987 red_capture_queue_buffer_.resize(red_render_queue_element_max_size_);
988 } else {
989 red_render_signal_queue_->Clear();
990 }
991 }
992
EmptyQueuedRenderAudio()993 void AudioProcessingImpl::EmptyQueuedRenderAudio() {
994 MutexLock lock_capture(&mutex_capture_);
995 EmptyQueuedRenderAudioLocked();
996 }
997
EmptyQueuedRenderAudioLocked()998 void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() {
999 if (submodules_.echo_control_mobile) {
1000 RTC_DCHECK(aecm_render_signal_queue_);
1001 while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) {
1002 submodules_.echo_control_mobile->ProcessRenderAudio(
1003 aecm_capture_queue_buffer_);
1004 }
1005 }
1006
1007 if (submodules_.gain_control) {
1008 while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) {
1009 submodules_.gain_control->ProcessRenderAudio(agc_capture_queue_buffer_);
1010 }
1011 }
1012
1013 while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) {
1014 RTC_DCHECK(submodules_.echo_detector);
1015 submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_);
1016 }
1017 }
1018
ProcessStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1019 int AudioProcessingImpl::ProcessStream(const int16_t* const src,
1020 const StreamConfig& input_config,
1021 const StreamConfig& output_config,
1022 int16_t* const dest) {
1023 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
1024 RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
1025
1026 MutexLock lock_capture(&mutex_capture_);
1027
1028 if (aec_dump_) {
1029 RecordUnprocessedCaptureStream(src, input_config);
1030 }
1031
1032 capture_.capture_audio->CopyFrom(src, input_config);
1033 if (capture_.capture_fullband_audio) {
1034 capture_.capture_fullband_audio->CopyFrom(src, input_config);
1035 }
1036 RETURN_ON_ERR(ProcessCaptureStreamLocked());
1037 if (submodule_states_.CaptureMultiBandProcessingPresent() ||
1038 submodule_states_.CaptureFullBandProcessingActive()) {
1039 if (capture_.capture_fullband_audio) {
1040 capture_.capture_fullband_audio->CopyTo(output_config, dest);
1041 } else {
1042 capture_.capture_audio->CopyTo(output_config, dest);
1043 }
1044 }
1045
1046 if (aec_dump_) {
1047 RecordProcessedCaptureStream(dest, output_config);
1048 }
1049
1050 return kNoError;
1051 }
1052
ProcessCaptureStreamLocked()1053 int AudioProcessingImpl::ProcessCaptureStreamLocked() {
1054 EmptyQueuedRenderAudioLocked();
1055 HandleCaptureRuntimeSettings();
1056
1057 // Ensure that not both the AEC and AECM are active at the same time.
1058 // TODO(peah): Simplify once the public API Enable functions for these
1059 // are moved to APM.
1060 RTC_DCHECK_LE(
1061 !!submodules_.echo_controller + !!submodules_.echo_control_mobile, 1);
1062
1063 AudioBuffer* capture_buffer = capture_.capture_audio.get(); // For brevity.
1064 AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1065
1066 if (submodules_.high_pass_filter &&
1067 config_.high_pass_filter.apply_in_full_band &&
1068 !constants_.enforce_split_band_hpf) {
1069 submodules_.high_pass_filter->Process(capture_buffer,
1070 /*use_split_band_data=*/false);
1071 }
1072
1073 if (submodules_.pre_amplifier) {
1074 submodules_.pre_amplifier->ApplyGain(AudioFrameView<float>(
1075 capture_buffer->channels(), capture_buffer->num_channels(),
1076 capture_buffer->num_frames()));
1077 }
1078
1079 capture_input_rms_.Analyze(rtc::ArrayView<const float>(
1080 capture_buffer->channels_const()[0],
1081 capture_nonlocked_.capture_processing_format.num_frames()));
1082 const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
1083 if (log_rms) {
1084 capture_rms_interval_counter_ = 0;
1085 RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak();
1086 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms",
1087 levels.average, 1, RmsLevel::kMinLevelDb, 64);
1088 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms",
1089 levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1090 }
1091
1092 if (submodules_.echo_controller) {
1093 // Detect and flag any change in the analog gain.
1094 int analog_mic_level = recommended_stream_analog_level_locked();
1095 capture_.echo_path_gain_change =
1096 capture_.prev_analog_mic_level != analog_mic_level &&
1097 capture_.prev_analog_mic_level != -1;
1098 capture_.prev_analog_mic_level = analog_mic_level;
1099
1100 // Detect and flag any change in the pre-amplifier gain.
1101 if (submodules_.pre_amplifier) {
1102 float pre_amp_gain = submodules_.pre_amplifier->GetGainFactor();
1103 capture_.echo_path_gain_change =
1104 capture_.echo_path_gain_change ||
1105 (capture_.prev_pre_amp_gain != pre_amp_gain &&
1106 capture_.prev_pre_amp_gain >= 0.f);
1107 capture_.prev_pre_amp_gain = pre_amp_gain;
1108 }
1109
1110 // Detect volume change.
1111 capture_.echo_path_gain_change =
1112 capture_.echo_path_gain_change ||
1113 (capture_.prev_playout_volume != capture_.playout_volume &&
1114 capture_.prev_playout_volume >= 0);
1115 capture_.prev_playout_volume = capture_.playout_volume;
1116
1117 submodules_.echo_controller->AnalyzeCapture(capture_buffer);
1118 }
1119
1120 if (submodules_.agc_manager) {
1121 submodules_.agc_manager->AnalyzePreProcess(capture_buffer);
1122 }
1123
1124 if (submodule_states_.CaptureMultiBandSubModulesActive() &&
1125 SampleRateSupportsMultiBand(
1126 capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1127 capture_buffer->SplitIntoFrequencyBands();
1128 }
1129
1130 const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
1131 constants_.multi_channel_capture_support;
1132 if (submodules_.echo_controller && !multi_channel_capture) {
1133 // Force down-mixing of the number of channels after the detection of
1134 // capture signal saturation.
1135 // TODO(peah): Look into ensuring that this kind of tampering with the
1136 // AudioBuffer functionality should not be needed.
1137 capture_buffer->set_num_channels(1);
1138 }
1139
1140 if (submodules_.high_pass_filter &&
1141 (!config_.high_pass_filter.apply_in_full_band ||
1142 constants_.enforce_split_band_hpf)) {
1143 submodules_.high_pass_filter->Process(capture_buffer,
1144 /*use_split_band_data=*/true);
1145 }
1146
1147 if (submodules_.gain_control) {
1148 RETURN_ON_ERR(
1149 submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer));
1150 }
1151
1152 if ((!config_.noise_suppression.analyze_linear_aec_output_when_available ||
1153 !linear_aec_buffer || submodules_.echo_control_mobile) &&
1154 submodules_.noise_suppressor) {
1155 submodules_.noise_suppressor->Analyze(*capture_buffer);
1156 }
1157
1158 if (submodules_.echo_control_mobile) {
1159 // Ensure that the stream delay was set before the call to the
1160 // AECM ProcessCaptureAudio function.
1161 if (!capture_.was_stream_delay_set) {
1162 return AudioProcessing::kStreamParameterNotSetError;
1163 }
1164
1165 if (submodules_.noise_suppressor) {
1166 submodules_.noise_suppressor->Process(capture_buffer);
1167 }
1168
1169 RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio(
1170 capture_buffer, stream_delay_ms()));
1171 } else {
1172 if (submodules_.echo_controller) {
1173 data_dumper_->DumpRaw("stream_delay", stream_delay_ms());
1174
1175 if (capture_.was_stream_delay_set) {
1176 submodules_.echo_controller->SetAudioBufferDelay(stream_delay_ms());
1177 }
1178
1179 submodules_.echo_controller->ProcessCapture(
1180 capture_buffer, linear_aec_buffer, capture_.echo_path_gain_change);
1181 }
1182
1183 if (config_.noise_suppression.analyze_linear_aec_output_when_available &&
1184 linear_aec_buffer && submodules_.noise_suppressor) {
1185 submodules_.noise_suppressor->Analyze(*linear_aec_buffer);
1186 }
1187
1188 if (submodules_.noise_suppressor) {
1189 submodules_.noise_suppressor->Process(capture_buffer);
1190 }
1191 }
1192
1193 if (config_.voice_detection.enabled) {
1194 capture_.stats.voice_detected =
1195 submodules_.voice_detector->ProcessCaptureAudio(capture_buffer);
1196 } else {
1197 capture_.stats.voice_detected = absl::nullopt;
1198 }
1199
1200 if (submodules_.agc_manager) {
1201 submodules_.agc_manager->Process(capture_buffer);
1202
1203 absl::optional<int> new_digital_gain =
1204 submodules_.agc_manager->GetDigitalComressionGain();
1205 if (new_digital_gain && submodules_.gain_control) {
1206 submodules_.gain_control->set_compression_gain_db(*new_digital_gain);
1207 }
1208 }
1209
1210 if (submodules_.gain_control) {
1211 // TODO(peah): Add reporting from AEC3 whether there is echo.
1212 RETURN_ON_ERR(submodules_.gain_control->ProcessCaptureAudio(
1213 capture_buffer, /*stream_has_echo*/ false));
1214 }
1215
1216 if (submodule_states_.CaptureMultiBandProcessingPresent() &&
1217 SampleRateSupportsMultiBand(
1218 capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1219 capture_buffer->MergeFrequencyBands();
1220 }
1221
1222 if (capture_.capture_fullband_audio) {
1223 const auto& ec = submodules_.echo_controller;
1224 bool ec_active = ec ? ec->ActiveProcessing() : false;
1225 // Only update the fullband buffer if the multiband processing has changed
1226 // the signal. Keep the original signal otherwise.
1227 if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
1228 capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
1229 }
1230 capture_buffer = capture_.capture_fullband_audio.get();
1231 }
1232
1233 if (config_.residual_echo_detector.enabled) {
1234 RTC_DCHECK(submodules_.echo_detector);
1235 submodules_.echo_detector->AnalyzeCaptureAudio(rtc::ArrayView<const float>(
1236 capture_buffer->channels()[0], capture_buffer->num_frames()));
1237 }
1238
1239 // TODO(aluebs): Investigate if the transient suppression placement should be
1240 // before or after the AGC.
1241 if (submodules_.transient_suppressor) {
1242 float voice_probability = submodules_.agc_manager.get()
1243 ? submodules_.agc_manager->voice_probability()
1244 : 1.f;
1245
1246 submodules_.transient_suppressor->Suppress(
1247 capture_buffer->channels()[0], capture_buffer->num_frames(),
1248 capture_buffer->num_channels(),
1249 capture_buffer->split_bands_const(0)[kBand0To8kHz],
1250 capture_buffer->num_frames_per_band(),
1251 capture_.keyboard_info.keyboard_data,
1252 capture_.keyboard_info.num_keyboard_frames, voice_probability,
1253 capture_.key_pressed);
1254 }
1255
1256 // Experimental APM sub-module that analyzes |capture_buffer|.
1257 if (submodules_.capture_analyzer) {
1258 submodules_.capture_analyzer->Analyze(capture_buffer);
1259 }
1260
1261 if (submodules_.gain_controller2) {
1262 submodules_.gain_controller2->NotifyAnalogLevel(
1263 recommended_stream_analog_level_locked());
1264 submodules_.gain_controller2->Process(capture_buffer);
1265 }
1266
1267 if (submodules_.capture_post_processor) {
1268 submodules_.capture_post_processor->Process(capture_buffer);
1269 }
1270
1271 // The level estimator operates on the recombined data.
1272 if (config_.level_estimation.enabled) {
1273 submodules_.output_level_estimator->ProcessStream(*capture_buffer);
1274 capture_.stats.output_rms_dbfs = submodules_.output_level_estimator->RMS();
1275 } else {
1276 capture_.stats.output_rms_dbfs = absl::nullopt;
1277 }
1278
1279 capture_output_rms_.Analyze(rtc::ArrayView<const float>(
1280 capture_buffer->channels_const()[0],
1281 capture_nonlocked_.capture_processing_format.num_frames()));
1282 if (log_rms) {
1283 RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
1284 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelAverageRms",
1285 levels.average, 1, RmsLevel::kMinLevelDb, 64);
1286 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
1287 levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1288 }
1289
1290 if (submodules_.agc_manager) {
1291 int level = recommended_stream_analog_level_locked();
1292 data_dumper_->DumpRaw("experimental_gain_control_stream_analog_level", 1,
1293 &level);
1294 }
1295
1296 // Compute echo-related stats.
1297 if (submodules_.echo_controller) {
1298 auto ec_metrics = submodules_.echo_controller->GetMetrics();
1299 capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
1300 capture_.stats.echo_return_loss_enhancement =
1301 ec_metrics.echo_return_loss_enhancement;
1302 capture_.stats.delay_ms = ec_metrics.delay_ms;
1303 }
1304 if (config_.residual_echo_detector.enabled) {
1305 RTC_DCHECK(submodules_.echo_detector);
1306 auto ed_metrics = submodules_.echo_detector->GetMetrics();
1307 capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
1308 capture_.stats.residual_echo_likelihood_recent_max =
1309 ed_metrics.echo_likelihood_recent_max;
1310 }
1311
1312 // Pass stats for reporting.
1313 stats_reporter_.UpdateStatistics(capture_.stats);
1314
1315 capture_.was_stream_delay_set = false;
1316 return kNoError;
1317 }
1318
AnalyzeReverseStream(const float * const * data,const StreamConfig & reverse_config)1319 int AudioProcessingImpl::AnalyzeReverseStream(
1320 const float* const* data,
1321 const StreamConfig& reverse_config) {
1322 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig");
1323 MutexLock lock(&mutex_render_);
1324 return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
1325 }
1326
ProcessReverseStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)1327 int AudioProcessingImpl::ProcessReverseStream(const float* const* src,
1328 const StreamConfig& input_config,
1329 const StreamConfig& output_config,
1330 float* const* dest) {
1331 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
1332 MutexLock lock(&mutex_render_);
1333 RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
1334 if (submodule_states_.RenderMultiBandProcessingActive() ||
1335 submodule_states_.RenderFullBandProcessingActive()) {
1336 render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
1337 dest);
1338 } else if (formats_.api_format.reverse_input_stream() !=
1339 formats_.api_format.reverse_output_stream()) {
1340 render_.render_converter->Convert(src, input_config.num_samples(), dest,
1341 output_config.num_samples());
1342 } else {
1343 CopyAudioIfNeeded(src, input_config.num_frames(),
1344 input_config.num_channels(), dest);
1345 }
1346
1347 return kNoError;
1348 }
1349
AnalyzeReverseStreamLocked(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config)1350 int AudioProcessingImpl::AnalyzeReverseStreamLocked(
1351 const float* const* src,
1352 const StreamConfig& input_config,
1353 const StreamConfig& output_config) {
1354 if (src == nullptr) {
1355 return kNullPointerError;
1356 }
1357
1358 if (input_config.num_channels() == 0) {
1359 return kBadNumberChannelsError;
1360 }
1361
1362 ProcessingConfig processing_config = formats_.api_format;
1363 processing_config.reverse_input_stream() = input_config;
1364 processing_config.reverse_output_stream() = output_config;
1365
1366 RETURN_ON_ERR(MaybeInitializeRender(processing_config));
1367 RTC_DCHECK_EQ(input_config.num_frames(),
1368 formats_.api_format.reverse_input_stream().num_frames());
1369
1370 if (aec_dump_) {
1371 const size_t channel_size =
1372 formats_.api_format.reverse_input_stream().num_frames();
1373 const size_t num_channels =
1374 formats_.api_format.reverse_input_stream().num_channels();
1375 aec_dump_->WriteRenderStreamMessage(
1376 AudioFrameView<const float>(src, num_channels, channel_size));
1377 }
1378 render_.render_audio->CopyFrom(src,
1379 formats_.api_format.reverse_input_stream());
1380 return ProcessRenderStreamLocked();
1381 }
1382
ProcessReverseStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1383 int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
1384 const StreamConfig& input_config,
1385 const StreamConfig& output_config,
1386 int16_t* const dest) {
1387 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
1388
1389 if (input_config.num_channels() <= 0) {
1390 return AudioProcessing::Error::kBadNumberChannelsError;
1391 }
1392
1393 MutexLock lock(&mutex_render_);
1394 ProcessingConfig processing_config = formats_.api_format;
1395 processing_config.reverse_input_stream().set_sample_rate_hz(
1396 input_config.sample_rate_hz());
1397 processing_config.reverse_input_stream().set_num_channels(
1398 input_config.num_channels());
1399 processing_config.reverse_output_stream().set_sample_rate_hz(
1400 output_config.sample_rate_hz());
1401 processing_config.reverse_output_stream().set_num_channels(
1402 output_config.num_channels());
1403
1404 RETURN_ON_ERR(MaybeInitializeRender(processing_config));
1405 if (input_config.num_frames() !=
1406 formats_.api_format.reverse_input_stream().num_frames()) {
1407 return kBadDataLengthError;
1408 }
1409
1410 if (aec_dump_) {
1411 aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),
1412 input_config.num_channels());
1413 }
1414
1415 render_.render_audio->CopyFrom(src, input_config);
1416 RETURN_ON_ERR(ProcessRenderStreamLocked());
1417 if (submodule_states_.RenderMultiBandProcessingActive() ||
1418 submodule_states_.RenderFullBandProcessingActive()) {
1419 render_.render_audio->CopyTo(output_config, dest);
1420 }
1421 return kNoError;
1422 }
1423
ProcessRenderStreamLocked()1424 int AudioProcessingImpl::ProcessRenderStreamLocked() {
1425 AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity.
1426
1427 HandleRenderRuntimeSettings();
1428
1429 if (submodules_.render_pre_processor) {
1430 submodules_.render_pre_processor->Process(render_buffer);
1431 }
1432
1433 QueueNonbandedRenderAudio(render_buffer);
1434
1435 if (submodule_states_.RenderMultiBandSubModulesActive() &&
1436 SampleRateSupportsMultiBand(
1437 formats_.render_processing_format.sample_rate_hz())) {
1438 render_buffer->SplitIntoFrequencyBands();
1439 }
1440
1441 if (submodule_states_.RenderMultiBandSubModulesActive()) {
1442 QueueBandedRenderAudio(render_buffer);
1443 }
1444
1445 // TODO(peah): Perform the queuing inside QueueRenderAudiuo().
1446 if (submodules_.echo_controller) {
1447 submodules_.echo_controller->AnalyzeRender(render_buffer);
1448 }
1449
1450 if (submodule_states_.RenderMultiBandProcessingActive() &&
1451 SampleRateSupportsMultiBand(
1452 formats_.render_processing_format.sample_rate_hz())) {
1453 render_buffer->MergeFrequencyBands();
1454 }
1455
1456 return kNoError;
1457 }
1458
set_stream_delay_ms(int delay)1459 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
1460 MutexLock lock(&mutex_capture_);
1461 Error retval = kNoError;
1462 capture_.was_stream_delay_set = true;
1463
1464 if (delay < 0) {
1465 delay = 0;
1466 retval = kBadStreamParameterWarning;
1467 }
1468
1469 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
1470 if (delay > 500) {
1471 delay = 500;
1472 retval = kBadStreamParameterWarning;
1473 }
1474
1475 capture_nonlocked_.stream_delay_ms = delay;
1476 return retval;
1477 }
1478
GetLinearAecOutput(rtc::ArrayView<std::array<float,160>> linear_output) const1479 bool AudioProcessingImpl::GetLinearAecOutput(
1480 rtc::ArrayView<std::array<float, 160>> linear_output) const {
1481 MutexLock lock(&mutex_capture_);
1482 AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1483
1484 RTC_DCHECK(linear_aec_buffer);
1485 if (linear_aec_buffer) {
1486 RTC_DCHECK_EQ(1, linear_aec_buffer->num_bands());
1487 RTC_DCHECK_EQ(linear_output.size(), linear_aec_buffer->num_channels());
1488
1489 for (size_t ch = 0; ch < linear_aec_buffer->num_channels(); ++ch) {
1490 RTC_DCHECK_EQ(linear_output[ch].size(), linear_aec_buffer->num_frames());
1491 rtc::ArrayView<const float> channel_view =
1492 rtc::ArrayView<const float>(linear_aec_buffer->channels_const()[ch],
1493 linear_aec_buffer->num_frames());
1494 std::copy(channel_view.begin(), channel_view.end(),
1495 linear_output[ch].begin());
1496 }
1497 return true;
1498 }
1499 RTC_LOG(LS_ERROR) << "No linear AEC output available";
1500 RTC_NOTREACHED();
1501 return false;
1502 }
1503
stream_delay_ms() const1504 int AudioProcessingImpl::stream_delay_ms() const {
1505 // Used as callback from submodules, hence locking is not allowed.
1506 return capture_nonlocked_.stream_delay_ms;
1507 }
1508
set_stream_key_pressed(bool key_pressed)1509 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
1510 MutexLock lock(&mutex_capture_);
1511 capture_.key_pressed = key_pressed;
1512 }
1513
set_stream_analog_level(int level)1514 void AudioProcessingImpl::set_stream_analog_level(int level) {
1515 MutexLock lock_capture(&mutex_capture_);
1516
1517 if (submodules_.agc_manager) {
1518 submodules_.agc_manager->set_stream_analog_level(level);
1519 data_dumper_->DumpRaw("experimental_gain_control_set_stream_analog_level",
1520 1, &level);
1521 } else if (submodules_.gain_control) {
1522 int error = submodules_.gain_control->set_stream_analog_level(level);
1523 RTC_DCHECK_EQ(kNoError, error);
1524 } else {
1525 capture_.cached_stream_analog_level_ = level;
1526 }
1527 }
1528
recommended_stream_analog_level() const1529 int AudioProcessingImpl::recommended_stream_analog_level() const {
1530 MutexLock lock_capture(&mutex_capture_);
1531 return recommended_stream_analog_level_locked();
1532 }
1533
recommended_stream_analog_level_locked() const1534 int AudioProcessingImpl::recommended_stream_analog_level_locked() const {
1535 if (submodules_.agc_manager) {
1536 return submodules_.agc_manager->stream_analog_level();
1537 } else if (submodules_.gain_control) {
1538 return submodules_.gain_control->stream_analog_level();
1539 } else {
1540 return capture_.cached_stream_analog_level_;
1541 }
1542 }
1543
CreateAndAttachAecDump(const std::string & file_name,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)1544 bool AudioProcessingImpl::CreateAndAttachAecDump(const std::string& file_name,
1545 int64_t max_log_size_bytes,
1546 rtc::TaskQueue* worker_queue) {
1547 std::unique_ptr<AecDump> aec_dump =
1548 AecDumpFactory::Create(file_name, max_log_size_bytes, worker_queue);
1549 if (!aec_dump) {
1550 return false;
1551 }
1552
1553 AttachAecDump(std::move(aec_dump));
1554 return true;
1555 }
1556
CreateAndAttachAecDump(FILE * handle,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)1557 bool AudioProcessingImpl::CreateAndAttachAecDump(FILE* handle,
1558 int64_t max_log_size_bytes,
1559 rtc::TaskQueue* worker_queue) {
1560 std::unique_ptr<AecDump> aec_dump =
1561 AecDumpFactory::Create(handle, max_log_size_bytes, worker_queue);
1562 if (!aec_dump) {
1563 return false;
1564 }
1565
1566 AttachAecDump(std::move(aec_dump));
1567 return true;
1568 }
1569
AttachAecDump(std::unique_ptr<AecDump> aec_dump)1570 void AudioProcessingImpl::AttachAecDump(std::unique_ptr<AecDump> aec_dump) {
1571 RTC_DCHECK(aec_dump);
1572 MutexLock lock_render(&mutex_render_);
1573 MutexLock lock_capture(&mutex_capture_);
1574
1575 // The previously attached AecDump will be destroyed with the
1576 // 'aec_dump' parameter, which is after locks are released.
1577 aec_dump_.swap(aec_dump);
1578 WriteAecDumpConfigMessage(true);
1579 aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
1580 }
1581
DetachAecDump()1582 void AudioProcessingImpl::DetachAecDump() {
1583 // The d-tor of a task-queue based AecDump blocks until all pending
1584 // tasks are done. This construction avoids blocking while holding
1585 // the render and capture locks.
1586 std::unique_ptr<AecDump> aec_dump = nullptr;
1587 {
1588 MutexLock lock_render(&mutex_render_);
1589 MutexLock lock_capture(&mutex_capture_);
1590 aec_dump = std::move(aec_dump_);
1591 }
1592 }
1593
MutateConfig(rtc::FunctionView<void (AudioProcessing::Config *)> mutator)1594 void AudioProcessingImpl::MutateConfig(
1595 rtc::FunctionView<void(AudioProcessing::Config*)> mutator) {
1596 MutexLock lock_render(&mutex_render_);
1597 MutexLock lock_capture(&mutex_capture_);
1598 mutator(&config_);
1599 ApplyConfig(config_);
1600 }
1601
GetConfig() const1602 AudioProcessing::Config AudioProcessingImpl::GetConfig() const {
1603 MutexLock lock_render(&mutex_render_);
1604 MutexLock lock_capture(&mutex_capture_);
1605 return config_;
1606 }
1607
UpdateActiveSubmoduleStates()1608 bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
1609 return submodule_states_.Update(
1610 config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
1611 config_.residual_echo_detector.enabled, !!submodules_.noise_suppressor,
1612 !!submodules_.gain_control, !!submodules_.gain_controller2,
1613 config_.pre_amplifier.enabled, capture_nonlocked_.echo_controller_enabled,
1614 config_.voice_detection.enabled, !!submodules_.transient_suppressor);
1615 }
1616
InitializeTransientSuppressor()1617 void AudioProcessingImpl::InitializeTransientSuppressor() {
1618 if (config_.transient_suppression.enabled) {
1619 // Attempt to create a transient suppressor, if one is not already created.
1620 if (!submodules_.transient_suppressor) {
1621 submodules_.transient_suppressor =
1622 CreateTransientSuppressor(submodule_creation_overrides_);
1623 }
1624 if (submodules_.transient_suppressor) {
1625 submodules_.transient_suppressor->Initialize(
1626 proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
1627 num_proc_channels());
1628 } else {
1629 RTC_LOG(LS_WARNING)
1630 << "No transient suppressor created (probably disabled)";
1631 }
1632 } else {
1633 submodules_.transient_suppressor.reset();
1634 }
1635 }
1636
InitializeHighPassFilter(bool forced_reset)1637 void AudioProcessingImpl::InitializeHighPassFilter(bool forced_reset) {
1638 bool high_pass_filter_needed_by_aec =
1639 config_.echo_canceller.enabled &&
1640 config_.echo_canceller.enforce_high_pass_filtering &&
1641 !config_.echo_canceller.mobile_mode;
1642 if (submodule_states_.HighPassFilteringRequired() ||
1643 high_pass_filter_needed_by_aec) {
1644 bool use_full_band = config_.high_pass_filter.apply_in_full_band &&
1645 !constants_.enforce_split_band_hpf;
1646 int rate = use_full_band ? proc_fullband_sample_rate_hz()
1647 : proc_split_sample_rate_hz();
1648 size_t num_channels =
1649 use_full_band ? num_output_channels() : num_proc_channels();
1650
1651 if (!submodules_.high_pass_filter ||
1652 rate != submodules_.high_pass_filter->sample_rate_hz() ||
1653 forced_reset ||
1654 num_channels != submodules_.high_pass_filter->num_channels()) {
1655 submodules_.high_pass_filter.reset(
1656 new HighPassFilter(rate, num_channels));
1657 }
1658 } else {
1659 submodules_.high_pass_filter.reset();
1660 }
1661 }
1662
InitializeVoiceDetector()1663 void AudioProcessingImpl::InitializeVoiceDetector() {
1664 if (config_.voice_detection.enabled) {
1665 submodules_.voice_detector = std::make_unique<VoiceDetection>(
1666 proc_split_sample_rate_hz(), VoiceDetection::kVeryLowLikelihood);
1667 } else {
1668 submodules_.voice_detector.reset();
1669 }
1670 }
InitializeEchoController()1671 void AudioProcessingImpl::InitializeEchoController() {
1672 bool use_echo_controller =
1673 echo_control_factory_ ||
1674 (config_.echo_canceller.enabled && !config_.echo_canceller.mobile_mode);
1675
1676 if (use_echo_controller) {
1677 // Create and activate the echo controller.
1678 if (echo_control_factory_) {
1679 submodules_.echo_controller = echo_control_factory_->Create(
1680 proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels());
1681 RTC_DCHECK(submodules_.echo_controller);
1682 } else {
1683 EchoCanceller3Config config =
1684 use_setup_specific_default_aec3_config_
1685 ? EchoCanceller3::CreateDefaultConfig(num_reverse_channels(),
1686 num_proc_channels())
1687 : EchoCanceller3Config();
1688 submodules_.echo_controller = std::make_unique<EchoCanceller3>(
1689 config, proc_sample_rate_hz(), num_reverse_channels(),
1690 num_proc_channels());
1691 }
1692
1693 // Setup the storage for returning the linear AEC output.
1694 if (config_.echo_canceller.export_linear_aec_output) {
1695 constexpr int kLinearOutputRateHz = 16000;
1696 capture_.linear_aec_output = std::make_unique<AudioBuffer>(
1697 kLinearOutputRateHz, num_proc_channels(), kLinearOutputRateHz,
1698 num_proc_channels(), kLinearOutputRateHz, num_proc_channels());
1699 } else {
1700 capture_.linear_aec_output.reset();
1701 }
1702
1703 capture_nonlocked_.echo_controller_enabled = true;
1704
1705 submodules_.echo_control_mobile.reset();
1706 aecm_render_signal_queue_.reset();
1707 return;
1708 }
1709
1710 submodules_.echo_controller.reset();
1711 capture_nonlocked_.echo_controller_enabled = false;
1712 capture_.linear_aec_output.reset();
1713
1714 if (!config_.echo_canceller.enabled) {
1715 submodules_.echo_control_mobile.reset();
1716 aecm_render_signal_queue_.reset();
1717 return;
1718 }
1719
1720 if (config_.echo_canceller.mobile_mode) {
1721 // Create and activate AECM.
1722 size_t max_element_size =
1723 std::max(static_cast<size_t>(1),
1724 kMaxAllowedValuesOfSamplesPerBand *
1725 EchoControlMobileImpl::NumCancellersRequired(
1726 num_output_channels(), num_reverse_channels()));
1727
1728 std::vector<int16_t> template_queue_element(max_element_size);
1729
1730 aecm_render_signal_queue_.reset(
1731 new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
1732 kMaxNumFramesToBuffer, template_queue_element,
1733 RenderQueueItemVerifier<int16_t>(max_element_size)));
1734
1735 aecm_render_queue_buffer_.resize(max_element_size);
1736 aecm_capture_queue_buffer_.resize(max_element_size);
1737
1738 submodules_.echo_control_mobile.reset(new EchoControlMobileImpl());
1739
1740 submodules_.echo_control_mobile->Initialize(proc_split_sample_rate_hz(),
1741 num_reverse_channels(),
1742 num_output_channels());
1743 return;
1744 }
1745
1746 submodules_.echo_control_mobile.reset();
1747 aecm_render_signal_queue_.reset();
1748 }
1749
InitializeGainController1()1750 void AudioProcessingImpl::InitializeGainController1() {
1751 if (!config_.gain_controller1.enabled) {
1752 submodules_.agc_manager.reset();
1753 submodules_.gain_control.reset();
1754 return;
1755 }
1756
1757 if (!submodules_.gain_control) {
1758 submodules_.gain_control.reset(new GainControlImpl());
1759 }
1760
1761 submodules_.gain_control->Initialize(num_proc_channels(),
1762 proc_sample_rate_hz());
1763
1764 if (!config_.gain_controller1.analog_gain_controller.enabled) {
1765 int error = submodules_.gain_control->set_mode(
1766 Agc1ConfigModeToInterfaceMode(config_.gain_controller1.mode));
1767 RTC_DCHECK_EQ(kNoError, error);
1768 error = submodules_.gain_control->set_target_level_dbfs(
1769 config_.gain_controller1.target_level_dbfs);
1770 RTC_DCHECK_EQ(kNoError, error);
1771 error = submodules_.gain_control->set_compression_gain_db(
1772 config_.gain_controller1.compression_gain_db);
1773 RTC_DCHECK_EQ(kNoError, error);
1774 error = submodules_.gain_control->enable_limiter(
1775 config_.gain_controller1.enable_limiter);
1776 RTC_DCHECK_EQ(kNoError, error);
1777 error = submodules_.gain_control->set_analog_level_limits(
1778 config_.gain_controller1.analog_level_minimum,
1779 config_.gain_controller1.analog_level_maximum);
1780 RTC_DCHECK_EQ(kNoError, error);
1781
1782 submodules_.agc_manager.reset();
1783 return;
1784 }
1785
1786 if (!submodules_.agc_manager.get() ||
1787 submodules_.agc_manager->num_channels() !=
1788 static_cast<int>(num_proc_channels()) ||
1789 submodules_.agc_manager->sample_rate_hz() !=
1790 capture_nonlocked_.split_rate) {
1791 int stream_analog_level = -1;
1792 const bool re_creation = !!submodules_.agc_manager;
1793 if (re_creation) {
1794 stream_analog_level = submodules_.agc_manager->stream_analog_level();
1795 }
1796 submodules_.agc_manager.reset(new AgcManagerDirect(
1797 num_proc_channels(),
1798 config_.gain_controller1.analog_gain_controller.startup_min_volume,
1799 config_.gain_controller1.analog_gain_controller.clipped_level_min,
1800 config_.gain_controller1.analog_gain_controller
1801 .enable_agc2_level_estimator,
1802 !config_.gain_controller1.analog_gain_controller
1803 .enable_digital_adaptive,
1804 capture_nonlocked_.split_rate));
1805 if (re_creation) {
1806 submodules_.agc_manager->set_stream_analog_level(stream_analog_level);
1807 }
1808 }
1809 submodules_.agc_manager->Initialize();
1810 submodules_.agc_manager->SetupDigitalGainControl(
1811 submodules_.gain_control.get());
1812 submodules_.agc_manager->SetCaptureMuted(capture_.output_will_be_muted);
1813 }
1814
InitializeGainController2()1815 void AudioProcessingImpl::InitializeGainController2() {
1816 if (config_.gain_controller2.enabled) {
1817 if (!submodules_.gain_controller2) {
1818 // TODO(alessiob): Move the injected gain controller once injection is
1819 // implemented.
1820 submodules_.gain_controller2.reset(new GainController2());
1821 }
1822
1823 submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz());
1824 submodules_.gain_controller2->ApplyConfig(config_.gain_controller2);
1825 } else {
1826 submodules_.gain_controller2.reset();
1827 }
1828 }
1829
InitializeNoiseSuppressor()1830 void AudioProcessingImpl::InitializeNoiseSuppressor() {
1831 submodules_.noise_suppressor.reset();
1832
1833 if (config_.noise_suppression.enabled) {
1834 auto map_level =
1835 [](AudioProcessing::Config::NoiseSuppression::Level level) {
1836 using NoiseSuppresionConfig =
1837 AudioProcessing::Config::NoiseSuppression;
1838 switch (level) {
1839 case NoiseSuppresionConfig::kLow:
1840 return NsConfig::SuppressionLevel::k6dB;
1841 case NoiseSuppresionConfig::kModerate:
1842 return NsConfig::SuppressionLevel::k12dB;
1843 case NoiseSuppresionConfig::kHigh:
1844 return NsConfig::SuppressionLevel::k18dB;
1845 case NoiseSuppresionConfig::kVeryHigh:
1846 return NsConfig::SuppressionLevel::k21dB;
1847 default:
1848 RTC_NOTREACHED();
1849 }
1850 };
1851
1852 NsConfig cfg;
1853 cfg.target_level = map_level(config_.noise_suppression.level);
1854 submodules_.noise_suppressor = std::make_unique<NoiseSuppressor>(
1855 cfg, proc_sample_rate_hz(), num_proc_channels());
1856 }
1857 }
1858
InitializePreAmplifier()1859 void AudioProcessingImpl::InitializePreAmplifier() {
1860 if (config_.pre_amplifier.enabled) {
1861 submodules_.pre_amplifier.reset(
1862 new GainApplier(true, config_.pre_amplifier.fixed_gain_factor));
1863 } else {
1864 submodules_.pre_amplifier.reset();
1865 }
1866 }
1867
InitializeResidualEchoDetector()1868 void AudioProcessingImpl::InitializeResidualEchoDetector() {
1869 RTC_DCHECK(submodules_.echo_detector);
1870 submodules_.echo_detector->Initialize(
1871 proc_fullband_sample_rate_hz(), 1,
1872 formats_.render_processing_format.sample_rate_hz(), 1);
1873 }
1874
InitializeAnalyzer()1875 void AudioProcessingImpl::InitializeAnalyzer() {
1876 if (submodules_.capture_analyzer) {
1877 submodules_.capture_analyzer->Initialize(proc_fullband_sample_rate_hz(),
1878 num_proc_channels());
1879 }
1880 }
1881
InitializePostProcessor()1882 void AudioProcessingImpl::InitializePostProcessor() {
1883 if (submodules_.capture_post_processor) {
1884 submodules_.capture_post_processor->Initialize(
1885 proc_fullband_sample_rate_hz(), num_proc_channels());
1886 }
1887 }
1888
InitializePreProcessor()1889 void AudioProcessingImpl::InitializePreProcessor() {
1890 if (submodules_.render_pre_processor) {
1891 submodules_.render_pre_processor->Initialize(
1892 formats_.render_processing_format.sample_rate_hz(),
1893 formats_.render_processing_format.num_channels());
1894 }
1895 }
1896
WriteAecDumpConfigMessage(bool forced)1897 void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) {
1898 if (!aec_dump_) {
1899 return;
1900 }
1901
1902 std::string experiments_description = "";
1903 // TODO(peah): Add semicolon-separated concatenations of experiment
1904 // descriptions for other submodules.
1905 if (config_.gain_controller1.analog_gain_controller.clipped_level_min !=
1906 kClippedLevelMin) {
1907 experiments_description += "AgcClippingLevelExperiment;";
1908 }
1909 if (!!submodules_.capture_post_processor) {
1910 experiments_description += "CapturePostProcessor;";
1911 }
1912 if (!!submodules_.render_pre_processor) {
1913 experiments_description += "RenderPreProcessor;";
1914 }
1915 if (capture_nonlocked_.echo_controller_enabled) {
1916 experiments_description += "EchoController;";
1917 }
1918 if (config_.gain_controller2.enabled) {
1919 experiments_description += "GainController2;";
1920 }
1921
1922 InternalAPMConfig apm_config;
1923
1924 apm_config.aec_enabled = config_.echo_canceller.enabled;
1925 apm_config.aec_delay_agnostic_enabled = false;
1926 apm_config.aec_extended_filter_enabled = false;
1927 apm_config.aec_suppression_level = 0;
1928
1929 apm_config.aecm_enabled = !!submodules_.echo_control_mobile;
1930 apm_config.aecm_comfort_noise_enabled =
1931 submodules_.echo_control_mobile &&
1932 submodules_.echo_control_mobile->is_comfort_noise_enabled();
1933 apm_config.aecm_routing_mode =
1934 submodules_.echo_control_mobile
1935 ? static_cast<int>(submodules_.echo_control_mobile->routing_mode())
1936 : 0;
1937
1938 apm_config.agc_enabled = !!submodules_.gain_control;
1939
1940 apm_config.agc_mode = submodules_.gain_control
1941 ? static_cast<int>(submodules_.gain_control->mode())
1942 : GainControl::kAdaptiveAnalog;
1943 apm_config.agc_limiter_enabled =
1944 submodules_.gain_control ? submodules_.gain_control->is_limiter_enabled()
1945 : false;
1946 apm_config.noise_robust_agc_enabled = !!submodules_.agc_manager;
1947
1948 apm_config.hpf_enabled = config_.high_pass_filter.enabled;
1949
1950 apm_config.ns_enabled = config_.noise_suppression.enabled;
1951 apm_config.ns_level = static_cast<int>(config_.noise_suppression.level);
1952
1953 apm_config.transient_suppression_enabled =
1954 config_.transient_suppression.enabled;
1955 apm_config.experiments_description = experiments_description;
1956 apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
1957 apm_config.pre_amplifier_fixed_gain_factor =
1958 config_.pre_amplifier.fixed_gain_factor;
1959
1960 if (!forced && apm_config == apm_config_for_aec_dump_) {
1961 return;
1962 }
1963 aec_dump_->WriteConfig(apm_config);
1964 apm_config_for_aec_dump_ = apm_config;
1965 }
1966
RecordUnprocessedCaptureStream(const float * const * src)1967 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
1968 const float* const* src) {
1969 RTC_DCHECK(aec_dump_);
1970 WriteAecDumpConfigMessage(false);
1971
1972 const size_t channel_size = formats_.api_format.input_stream().num_frames();
1973 const size_t num_channels = formats_.api_format.input_stream().num_channels();
1974 aec_dump_->AddCaptureStreamInput(
1975 AudioFrameView<const float>(src, num_channels, channel_size));
1976 RecordAudioProcessingState();
1977 }
1978
RecordUnprocessedCaptureStream(const int16_t * const data,const StreamConfig & config)1979 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
1980 const int16_t* const data,
1981 const StreamConfig& config) {
1982 RTC_DCHECK(aec_dump_);
1983 WriteAecDumpConfigMessage(false);
1984
1985 aec_dump_->AddCaptureStreamInput(data, config.num_channels(),
1986 config.num_frames());
1987 RecordAudioProcessingState();
1988 }
1989
RecordProcessedCaptureStream(const float * const * processed_capture_stream)1990 void AudioProcessingImpl::RecordProcessedCaptureStream(
1991 const float* const* processed_capture_stream) {
1992 RTC_DCHECK(aec_dump_);
1993
1994 const size_t channel_size = formats_.api_format.output_stream().num_frames();
1995 const size_t num_channels =
1996 formats_.api_format.output_stream().num_channels();
1997 aec_dump_->AddCaptureStreamOutput(AudioFrameView<const float>(
1998 processed_capture_stream, num_channels, channel_size));
1999 aec_dump_->WriteCaptureStreamMessage();
2000 }
2001
RecordProcessedCaptureStream(const int16_t * const data,const StreamConfig & config)2002 void AudioProcessingImpl::RecordProcessedCaptureStream(
2003 const int16_t* const data,
2004 const StreamConfig& config) {
2005 RTC_DCHECK(aec_dump_);
2006
2007 aec_dump_->AddCaptureStreamOutput(data, config.num_channels(),
2008 config.num_frames());
2009 aec_dump_->WriteCaptureStreamMessage();
2010 }
2011
RecordAudioProcessingState()2012 void AudioProcessingImpl::RecordAudioProcessingState() {
2013 RTC_DCHECK(aec_dump_);
2014 AecDump::AudioProcessingState audio_proc_state;
2015 audio_proc_state.delay = capture_nonlocked_.stream_delay_ms;
2016 audio_proc_state.drift = 0;
2017 audio_proc_state.level = recommended_stream_analog_level_locked();
2018 audio_proc_state.keypress = capture_.key_pressed;
2019 aec_dump_->AddAudioProcessingState(audio_proc_state);
2020 }
2021
ApmCaptureState()2022 AudioProcessingImpl::ApmCaptureState::ApmCaptureState()
2023 : was_stream_delay_set(false),
2024 output_will_be_muted(false),
2025 key_pressed(false),
2026 capture_processing_format(kSampleRate16kHz),
2027 split_rate(kSampleRate16kHz),
2028 echo_path_gain_change(false),
2029 prev_analog_mic_level(-1),
2030 prev_pre_amp_gain(-1.f),
2031 playout_volume(-1),
2032 prev_playout_volume(-1) {}
2033
2034 AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default;
2035
Extract(const float * const * data,const StreamConfig & stream_config)2036 void AudioProcessingImpl::ApmCaptureState::KeyboardInfo::Extract(
2037 const float* const* data,
2038 const StreamConfig& stream_config) {
2039 if (stream_config.has_keyboard()) {
2040 keyboard_data = data[stream_config.num_channels()];
2041 } else {
2042 keyboard_data = NULL;
2043 }
2044 num_keyboard_frames = stream_config.num_frames();
2045 }
2046
2047 AudioProcessingImpl::ApmRenderState::ApmRenderState() = default;
2048
2049 AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default;
2050
ApmStatsReporter()2051 AudioProcessingImpl::ApmStatsReporter::ApmStatsReporter()
2052 : stats_message_queue_(1) {}
2053
2054 AudioProcessingImpl::ApmStatsReporter::~ApmStatsReporter() = default;
2055
GetStatistics()2056 AudioProcessingStats AudioProcessingImpl::ApmStatsReporter::GetStatistics() {
2057 MutexLock lock_stats(&mutex_stats_);
2058 bool new_stats_available = stats_message_queue_.Remove(&cached_stats_);
2059 // If the message queue is full, return the cached stats.
2060 static_cast<void>(new_stats_available);
2061
2062 return cached_stats_;
2063 }
2064
UpdateStatistics(const AudioProcessingStats & new_stats)2065 void AudioProcessingImpl::ApmStatsReporter::UpdateStatistics(
2066 const AudioProcessingStats& new_stats) {
2067 AudioProcessingStats stats_to_queue = new_stats;
2068 bool stats_message_passed = stats_message_queue_.Insert(&stats_to_queue);
2069 // If the message queue is full, discard the new stats.
2070 static_cast<void>(stats_message_passed);
2071 }
2072
2073 } // namespace webrtc
2074