1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/audio_processing_impl.h"
12
13 #include <algorithm>
14 #include <cstdint>
15 #include <cstring>
16 #include <memory>
17 #include <string>
18 #include <type_traits>
19 #include <utility>
20
21 #include "absl/strings/match.h"
22 #include "absl/strings/string_view.h"
23 #include "absl/types/optional.h"
24 #include "api/array_view.h"
25 #include "api/audio/audio_frame.h"
26 #include "common_audio/audio_converter.h"
27 #include "common_audio/include/audio_util.h"
28 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
29 #include "modules/audio_processing/audio_buffer.h"
30 #include "modules/audio_processing/include/audio_frame_view.h"
31 #include "modules/audio_processing/logging/apm_data_dumper.h"
32 #include "modules/audio_processing/optionally_built_submodule_creators.h"
33 #include "rtc_base/checks.h"
34 #include "rtc_base/experiments/field_trial_parser.h"
35 #include "rtc_base/logging.h"
36 #include "rtc_base/time_utils.h"
37 #include "rtc_base/trace_event.h"
38 #include "system_wrappers/include/denormal_disabler.h"
39 #include "system_wrappers/include/field_trial.h"
40 #include "system_wrappers/include/metrics.h"
41
42 #define RETURN_ON_ERR(expr) \
43 do { \
44 int err = (expr); \
45 if (err != kNoError) { \
46 return err; \
47 } \
48 } while (0)
49
50 namespace webrtc {
51
52 namespace {
53
SampleRateSupportsMultiBand(int sample_rate_hz)54 bool SampleRateSupportsMultiBand(int sample_rate_hz) {
55 return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
56 sample_rate_hz == AudioProcessing::kSampleRate48kHz;
57 }
58
59 // Checks whether the high-pass filter should be done in the full-band.
EnforceSplitBandHpf()60 bool EnforceSplitBandHpf() {
61 return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch");
62 }
63
64 // Checks whether AEC3 should be allowed to decide what the default
65 // configuration should be based on the render and capture channel configuration
66 // at hand.
UseSetupSpecificDefaultAec3Congfig()67 bool UseSetupSpecificDefaultAec3Congfig() {
68 return !field_trial::IsEnabled(
69 "WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch");
70 }
71
72 // If the "WebRTC-Audio-TransientSuppressorVadMode" field trial is unspecified,
73 // returns `TransientSuppressor::VadMode::kDefault`, otherwise parses the field
74 // trial and returns the specified mode:
75 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-Default returns `kDefault`;
76 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad returns `kRnnVad`;
77 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-NoVad returns `kNoVad`.
GetTransientSuppressorVadMode()78 TransientSuppressor::VadMode GetTransientSuppressorVadMode() {
79 constexpr char kFieldTrial[] = "WebRTC-Audio-TransientSuppressorVadMode";
80 std::string full_name = webrtc::field_trial::FindFullName(kFieldTrial);
81 if (full_name.empty() || absl::EndsWith(full_name, "-Default")) {
82 return TransientSuppressor::VadMode::kDefault;
83 }
84 if (absl::EndsWith(full_name, "-RnnVad")) {
85 return TransientSuppressor::VadMode::kRnnVad;
86 }
87 if (absl::EndsWith(full_name, "-NoVad")) {
88 return TransientSuppressor::VadMode::kNoVad;
89 }
90 // Fallback to default.
91 RTC_LOG(LS_WARNING) << "Invalid parameter for " << kFieldTrial;
92 return TransientSuppressor::VadMode::kDefault;
93 }
94
95 // Identify the native processing rate that best handles a sample rate.
SuitableProcessRate(int minimum_rate,int max_splitting_rate,bool band_splitting_required)96 int SuitableProcessRate(int minimum_rate,
97 int max_splitting_rate,
98 bool band_splitting_required) {
99 const int uppermost_native_rate =
100 band_splitting_required ? max_splitting_rate : 48000;
101 for (auto rate : {16000, 32000, 48000}) {
102 if (rate >= uppermost_native_rate) {
103 return uppermost_native_rate;
104 }
105 if (rate >= minimum_rate) {
106 return rate;
107 }
108 }
109 RTC_DCHECK_NOTREACHED();
110 return uppermost_native_rate;
111 }
112
Agc1ConfigModeToInterfaceMode(AudioProcessing::Config::GainController1::Mode mode)113 GainControl::Mode Agc1ConfigModeToInterfaceMode(
114 AudioProcessing::Config::GainController1::Mode mode) {
115 using Agc1Config = AudioProcessing::Config::GainController1;
116 switch (mode) {
117 case Agc1Config::kAdaptiveAnalog:
118 return GainControl::kAdaptiveAnalog;
119 case Agc1Config::kAdaptiveDigital:
120 return GainControl::kAdaptiveDigital;
121 case Agc1Config::kFixedDigital:
122 return GainControl::kFixedDigital;
123 }
124 RTC_CHECK_NOTREACHED();
125 }
126
MinimizeProcessingForUnusedOutput()127 bool MinimizeProcessingForUnusedOutput() {
128 return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
129 }
130
131 // Maximum lengths that frame of samples being passed from the render side to
132 // the capture side can have (does not apply to AEC3).
133 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
134 static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
135
136 // Maximum number of frames to buffer in the render queue.
137 // TODO(peah): Decrease this once we properly handle hugely unbalanced
138 // reverse and forward call numbers.
139 static const size_t kMaxNumFramesToBuffer = 100;
140
PackRenderAudioBufferForEchoDetector(const AudioBuffer & audio,std::vector<float> & packed_buffer)141 void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio,
142 std::vector<float>& packed_buffer) {
143 packed_buffer.clear();
144 packed_buffer.insert(packed_buffer.end(), audio.channels_const()[0],
145 audio.channels_const()[0] + audio.num_frames());
146 }
147
148 // Options for gracefully handling processing errors.
149 enum class FormatErrorOutputOption {
150 kOutputExactCopyOfInput,
151 kOutputBroadcastCopyOfFirstInputChannel,
152 kOutputSilence,
153 kDoNothing
154 };
155
156 enum class AudioFormatValidity {
157 // Format is supported by APM.
158 kValidAndSupported,
159 // Format has a reasonable interpretation but is not supported.
160 kValidButUnsupportedSampleRate,
161 // The remaining enums values signal that the audio does not have a reasonable
162 // interpretation and cannot be used.
163 kInvalidSampleRate,
164 kInvalidChannelCount
165 };
166
ValidateAudioFormat(const StreamConfig & config)167 AudioFormatValidity ValidateAudioFormat(const StreamConfig& config) {
168 if (config.sample_rate_hz() < 0)
169 return AudioFormatValidity::kInvalidSampleRate;
170 if (config.num_channels() == 0)
171 return AudioFormatValidity::kInvalidChannelCount;
172
173 // Format has a reasonable interpretation, but may still be unsupported.
174 if (config.sample_rate_hz() < 8000 ||
175 config.sample_rate_hz() > AudioBuffer::kMaxSampleRate)
176 return AudioFormatValidity::kValidButUnsupportedSampleRate;
177
178 // Format is fully supported.
179 return AudioFormatValidity::kValidAndSupported;
180 }
181
AudioFormatValidityToErrorCode(AudioFormatValidity validity)182 int AudioFormatValidityToErrorCode(AudioFormatValidity validity) {
183 switch (validity) {
184 case AudioFormatValidity::kValidAndSupported:
185 return AudioProcessing::kNoError;
186 case AudioFormatValidity::kValidButUnsupportedSampleRate: // fall-through
187 case AudioFormatValidity::kInvalidSampleRate:
188 return AudioProcessing::kBadSampleRateError;
189 case AudioFormatValidity::kInvalidChannelCount:
190 return AudioProcessing::kBadNumberChannelsError;
191 }
192 RTC_DCHECK(false);
193 }
194
195 // Returns an AudioProcessing::Error together with the best possible option for
196 // output audio content.
ChooseErrorOutputOption(const StreamConfig & input_config,const StreamConfig & output_config)197 std::pair<int, FormatErrorOutputOption> ChooseErrorOutputOption(
198 const StreamConfig& input_config,
199 const StreamConfig& output_config) {
200 AudioFormatValidity input_validity = ValidateAudioFormat(input_config);
201 AudioFormatValidity output_validity = ValidateAudioFormat(output_config);
202
203 if (input_validity == AudioFormatValidity::kValidAndSupported &&
204 output_validity == AudioFormatValidity::kValidAndSupported &&
205 (output_config.num_channels() == 1 ||
206 output_config.num_channels() == input_config.num_channels())) {
207 return {AudioProcessing::kNoError, FormatErrorOutputOption::kDoNothing};
208 }
209
210 int error_code = AudioFormatValidityToErrorCode(input_validity);
211 if (error_code == AudioProcessing::kNoError) {
212 error_code = AudioFormatValidityToErrorCode(output_validity);
213 }
214 if (error_code == AudioProcessing::kNoError) {
215 // The individual formats are valid but there is some error - must be
216 // channel mismatch.
217 error_code = AudioProcessing::kBadNumberChannelsError;
218 }
219
220 FormatErrorOutputOption output_option;
221 if (output_validity != AudioFormatValidity::kValidAndSupported &&
222 output_validity != AudioFormatValidity::kValidButUnsupportedSampleRate) {
223 // The output format is uninterpretable: cannot do anything.
224 output_option = FormatErrorOutputOption::kDoNothing;
225 } else if (input_validity != AudioFormatValidity::kValidAndSupported &&
226 input_validity !=
227 AudioFormatValidity::kValidButUnsupportedSampleRate) {
228 // The input format is uninterpretable: cannot use it, must output silence.
229 output_option = FormatErrorOutputOption::kOutputSilence;
230 } else if (input_config.sample_rate_hz() != output_config.sample_rate_hz()) {
231 // Sample rates do not match: Cannot copy input into output, output silence.
232 // Note: If the sample rates are in a supported range, we could resample.
233 // However, that would significantly increase complexity of this error
234 // handling code.
235 output_option = FormatErrorOutputOption::kOutputSilence;
236 } else if (input_config.num_channels() != output_config.num_channels()) {
237 // Channel counts do not match: We cannot easily map input channels to
238 // output channels.
239 output_option =
240 FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel;
241 } else {
242 // The formats match exactly.
243 RTC_DCHECK(input_config == output_config);
244 output_option = FormatErrorOutputOption::kOutputExactCopyOfInput;
245 }
246 return std::make_pair(error_code, output_option);
247 }
248
249 // Checks if the audio format is supported. If not, the output is populated in a
250 // best-effort manner and an APM error code is returned.
HandleUnsupportedAudioFormats(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)251 int HandleUnsupportedAudioFormats(const int16_t* const src,
252 const StreamConfig& input_config,
253 const StreamConfig& output_config,
254 int16_t* const dest) {
255 RTC_DCHECK(src);
256 RTC_DCHECK(dest);
257
258 auto [error_code, output_option] =
259 ChooseErrorOutputOption(input_config, output_config);
260 if (error_code == AudioProcessing::kNoError)
261 return AudioProcessing::kNoError;
262
263 const size_t num_output_channels = output_config.num_channels();
264 switch (output_option) {
265 case FormatErrorOutputOption::kOutputSilence:
266 memset(dest, 0, output_config.num_samples() * sizeof(int16_t));
267 break;
268 case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
269 for (size_t i = 0; i < output_config.num_frames(); ++i) {
270 int16_t sample = src[input_config.num_channels() * i];
271 for (size_t ch = 0; ch < num_output_channels; ++ch) {
272 dest[ch + num_output_channels * i] = sample;
273 }
274 }
275 break;
276 case FormatErrorOutputOption::kOutputExactCopyOfInput:
277 memcpy(dest, src, output_config.num_samples() * sizeof(int16_t));
278 break;
279 case FormatErrorOutputOption::kDoNothing:
280 break;
281 }
282 return error_code;
283 }
284
285 // Checks if the audio format is supported. If not, the output is populated in a
286 // best-effort manner and an APM error code is returned.
HandleUnsupportedAudioFormats(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)287 int HandleUnsupportedAudioFormats(const float* const* src,
288 const StreamConfig& input_config,
289 const StreamConfig& output_config,
290 float* const* dest) {
291 RTC_DCHECK(src);
292 RTC_DCHECK(dest);
293 for (size_t i = 0; i < input_config.num_channels(); ++i) {
294 RTC_DCHECK(src[i]);
295 }
296 for (size_t i = 0; i < output_config.num_channels(); ++i) {
297 RTC_DCHECK(dest[i]);
298 }
299
300 auto [error_code, output_option] =
301 ChooseErrorOutputOption(input_config, output_config);
302 if (error_code == AudioProcessing::kNoError)
303 return AudioProcessing::kNoError;
304
305 const size_t num_output_channels = output_config.num_channels();
306 switch (output_option) {
307 case FormatErrorOutputOption::kOutputSilence:
308 for (size_t ch = 0; ch < num_output_channels; ++ch) {
309 memset(dest[ch], 0, output_config.num_frames() * sizeof(float));
310 }
311 break;
312 case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
313 for (size_t ch = 0; ch < num_output_channels; ++ch) {
314 memcpy(dest[ch], src[0], output_config.num_frames() * sizeof(float));
315 }
316 break;
317 case FormatErrorOutputOption::kOutputExactCopyOfInput:
318 for (size_t ch = 0; ch < num_output_channels; ++ch) {
319 memcpy(dest[ch], src[ch], output_config.num_frames() * sizeof(float));
320 }
321 break;
322 case FormatErrorOutputOption::kDoNothing:
323 break;
324 }
325 return error_code;
326 }
327
328 const absl::optional<InputVolumeController::Config>
GetInputVolumeControllerConfigOverride()329 GetInputVolumeControllerConfigOverride() {
330 constexpr char kInputVolumeControllerFieldTrial[] =
331 "WebRTC-Audio-InputVolumeControllerExperiment";
332
333 if (!field_trial::IsEnabled(kInputVolumeControllerFieldTrial)) {
334 return absl::nullopt;
335 }
336
337 constexpr InputVolumeController::Config kDefaultConfig;
338
339 FieldTrialFlag enabled("Enabled", false);
340 FieldTrialConstrained<int> clipped_level_min(
341 "clipped_level_min", kDefaultConfig.clipped_level_min, 0, 255);
342 FieldTrialConstrained<int> clipped_level_step(
343 "clipped_level_step", kDefaultConfig.clipped_level_step, 0, 255);
344 FieldTrialConstrained<double> clipped_ratio_threshold(
345 "clipped_ratio_threshold", kDefaultConfig.clipped_ratio_threshold, 0, 1);
346 FieldTrialConstrained<int> clipped_wait_frames(
347 "clipped_wait_frames", kDefaultConfig.clipped_wait_frames, 0,
348 absl::nullopt);
349 FieldTrialParameter<bool> enable_clipping_predictor(
350 "enable_clipping_predictor", kDefaultConfig.enable_clipping_predictor);
351 FieldTrialConstrained<int> target_range_max_dbfs(
352 "target_range_max_dbfs", kDefaultConfig.target_range_max_dbfs, -90, 30);
353 FieldTrialConstrained<int> target_range_min_dbfs(
354 "target_range_min_dbfs", kDefaultConfig.target_range_min_dbfs, -90, 30);
355 FieldTrialConstrained<int> update_input_volume_wait_frames(
356 "update_input_volume_wait_frames",
357 kDefaultConfig.update_input_volume_wait_frames, 0, absl::nullopt);
358 FieldTrialConstrained<double> speech_probability_threshold(
359 "speech_probability_threshold",
360 kDefaultConfig.speech_probability_threshold, 0, 1);
361 FieldTrialConstrained<double> speech_ratio_threshold(
362 "speech_ratio_threshold", kDefaultConfig.speech_ratio_threshold, 0, 1);
363
364 // Field-trial based override for the input volume controller config.
365 const std::string field_trial_name =
366 field_trial::FindFullName(kInputVolumeControllerFieldTrial);
367
368 ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step,
369 &clipped_ratio_threshold, &clipped_wait_frames,
370 &enable_clipping_predictor, &target_range_max_dbfs,
371 &target_range_min_dbfs, &update_input_volume_wait_frames,
372 &speech_probability_threshold, &speech_ratio_threshold},
373 field_trial_name);
374
375 // Checked already by `IsEnabled()` before parsing, therefore always true.
376 RTC_DCHECK(enabled);
377
378 return InputVolumeController::Config{
379 .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
380 .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
381 .clipped_ratio_threshold =
382 static_cast<float>(clipped_ratio_threshold.Get()),
383 .clipped_wait_frames = static_cast<int>(clipped_wait_frames.Get()),
384 .enable_clipping_predictor =
385 static_cast<bool>(enable_clipping_predictor.Get()),
386 .target_range_max_dbfs = static_cast<int>(target_range_max_dbfs.Get()),
387 .target_range_min_dbfs = static_cast<int>(target_range_min_dbfs.Get()),
388 .update_input_volume_wait_frames =
389 static_cast<int>(update_input_volume_wait_frames.Get()),
390 .speech_probability_threshold =
391 static_cast<float>(speech_probability_threshold.Get()),
392 .speech_ratio_threshold =
393 static_cast<float>(speech_ratio_threshold.Get()),
394 };
395 }
396
397 // Switches all gain control to AGC2 if experimenting with input volume
398 // controller.
AdjustConfig(const AudioProcessing::Config & config,const absl::optional<InputVolumeController::Config> & input_volume_controller_config_override)399 const AudioProcessing::Config AdjustConfig(
400 const AudioProcessing::Config& config,
401 const absl::optional<InputVolumeController::Config>&
402 input_volume_controller_config_override) {
403 const bool analog_agc_enabled =
404 config.gain_controller1.enabled &&
405 (config.gain_controller1.mode ==
406 AudioProcessing::Config::GainController1::kAdaptiveAnalog ||
407 config.gain_controller1.analog_gain_controller.enabled);
408
409 // Do not update the config if none of the analog AGCs is active
410 // regardless of the input volume controller override.
411 if (!analog_agc_enabled ||
412 !input_volume_controller_config_override.has_value()) {
413 return config;
414 }
415
416 const bool hybrid_agc_config_detected =
417 config.gain_controller1.enabled &&
418 config.gain_controller1.analog_gain_controller.enabled &&
419 !config.gain_controller1.analog_gain_controller.enable_digital_adaptive &&
420 config.gain_controller2.enabled &&
421 config.gain_controller2.adaptive_digital.enabled;
422
423 const bool full_agc1_config_detected =
424 config.gain_controller1.enabled &&
425 config.gain_controller1.analog_gain_controller.enabled &&
426 config.gain_controller1.analog_gain_controller.enable_digital_adaptive &&
427 !config.gain_controller2.enabled;
428
429 if (hybrid_agc_config_detected == full_agc1_config_detected ||
430 config.gain_controller2.input_volume_controller.enabled) {
431 RTC_LOG(LS_ERROR) << "Unexpected AGC config: Config not adjusted.";
432 return config;
433 }
434
435 AudioProcessing::Config adjusted_config = config;
436 adjusted_config.gain_controller1.enabled = false;
437 adjusted_config.gain_controller1.analog_gain_controller.enabled = false;
438 adjusted_config.gain_controller2.enabled = true;
439 adjusted_config.gain_controller2.adaptive_digital.enabled = true;
440 adjusted_config.gain_controller2.input_volume_controller.enabled = true;
441
442 return adjusted_config;
443 }
444
445 using DownmixMethod = AudioProcessing::Config::Pipeline::DownmixMethod;
446
SetDownmixMethod(AudioBuffer & buffer,DownmixMethod method)447 void SetDownmixMethod(AudioBuffer& buffer, DownmixMethod method) {
448 switch (method) {
449 case DownmixMethod::kAverageChannels:
450 buffer.set_downmixing_by_averaging();
451 break;
452 case DownmixMethod::kUseFirstChannel:
453 buffer.set_downmixing_to_specific_channel(/*channel=*/0);
454 break;
455 }
456 }
457
458 constexpr int kUnspecifiedDataDumpInputVolume = -100;
459
460 } // namespace
461
462 // Throughout webrtc, it's assumed that success is represented by zero.
463 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
464
SubmoduleStates(bool capture_post_processor_enabled,bool render_pre_processor_enabled,bool capture_analyzer_enabled)465 AudioProcessingImpl::SubmoduleStates::SubmoduleStates(
466 bool capture_post_processor_enabled,
467 bool render_pre_processor_enabled,
468 bool capture_analyzer_enabled)
469 : capture_post_processor_enabled_(capture_post_processor_enabled),
470 render_pre_processor_enabled_(render_pre_processor_enabled),
471 capture_analyzer_enabled_(capture_analyzer_enabled) {}
472
Update(bool high_pass_filter_enabled,bool mobile_echo_controller_enabled,bool noise_suppressor_enabled,bool adaptive_gain_controller_enabled,bool gain_controller2_enabled,bool voice_activity_detector_enabled,bool gain_adjustment_enabled,bool echo_controller_enabled,bool transient_suppressor_enabled)473 bool AudioProcessingImpl::SubmoduleStates::Update(
474 bool high_pass_filter_enabled,
475 bool mobile_echo_controller_enabled,
476 bool noise_suppressor_enabled,
477 bool adaptive_gain_controller_enabled,
478 bool gain_controller2_enabled,
479 bool voice_activity_detector_enabled,
480 bool gain_adjustment_enabled,
481 bool echo_controller_enabled,
482 bool transient_suppressor_enabled) {
483 bool changed = false;
484 changed |= (high_pass_filter_enabled != high_pass_filter_enabled_);
485 changed |=
486 (mobile_echo_controller_enabled != mobile_echo_controller_enabled_);
487 changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
488 changed |=
489 (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
490 changed |= (gain_controller2_enabled != gain_controller2_enabled_);
491 changed |=
492 (voice_activity_detector_enabled != voice_activity_detector_enabled_);
493 changed |= (gain_adjustment_enabled != gain_adjustment_enabled_);
494 changed |= (echo_controller_enabled != echo_controller_enabled_);
495 changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
496 if (changed) {
497 high_pass_filter_enabled_ = high_pass_filter_enabled;
498 mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
499 noise_suppressor_enabled_ = noise_suppressor_enabled;
500 adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
501 gain_controller2_enabled_ = gain_controller2_enabled;
502 voice_activity_detector_enabled_ = voice_activity_detector_enabled;
503 gain_adjustment_enabled_ = gain_adjustment_enabled;
504 echo_controller_enabled_ = echo_controller_enabled;
505 transient_suppressor_enabled_ = transient_suppressor_enabled;
506 }
507
508 changed |= first_update_;
509 first_update_ = false;
510 return changed;
511 }
512
CaptureMultiBandSubModulesActive() const513 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandSubModulesActive()
514 const {
515 return CaptureMultiBandProcessingPresent();
516 }
517
CaptureMultiBandProcessingPresent() const518 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingPresent()
519 const {
520 // If echo controller is present, assume it performs active processing.
521 return CaptureMultiBandProcessingActive(/*ec_processing_active=*/true);
522 }
523
CaptureMultiBandProcessingActive(bool ec_processing_active) const524 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingActive(
525 bool ec_processing_active) const {
526 return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
527 noise_suppressor_enabled_ || adaptive_gain_controller_enabled_ ||
528 (echo_controller_enabled_ && ec_processing_active);
529 }
530
CaptureFullBandProcessingActive() const531 bool AudioProcessingImpl::SubmoduleStates::CaptureFullBandProcessingActive()
532 const {
533 return gain_controller2_enabled_ || capture_post_processor_enabled_ ||
534 gain_adjustment_enabled_;
535 }
536
CaptureAnalyzerActive() const537 bool AudioProcessingImpl::SubmoduleStates::CaptureAnalyzerActive() const {
538 return capture_analyzer_enabled_;
539 }
540
RenderMultiBandSubModulesActive() const541 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandSubModulesActive()
542 const {
543 return RenderMultiBandProcessingActive() || mobile_echo_controller_enabled_ ||
544 adaptive_gain_controller_enabled_ || echo_controller_enabled_;
545 }
546
RenderFullBandProcessingActive() const547 bool AudioProcessingImpl::SubmoduleStates::RenderFullBandProcessingActive()
548 const {
549 return render_pre_processor_enabled_;
550 }
551
RenderMultiBandProcessingActive() const552 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandProcessingActive()
553 const {
554 return false;
555 }
556
HighPassFilteringRequired() const557 bool AudioProcessingImpl::SubmoduleStates::HighPassFilteringRequired() const {
558 return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
559 noise_suppressor_enabled_;
560 }
561
AudioProcessingImpl()562 AudioProcessingImpl::AudioProcessingImpl()
563 : AudioProcessingImpl(/*config=*/{},
564 /*capture_post_processor=*/nullptr,
565 /*render_pre_processor=*/nullptr,
566 /*echo_control_factory=*/nullptr,
567 /*echo_detector=*/nullptr,
568 /*capture_analyzer=*/nullptr) {}
569
570 std::atomic<int> AudioProcessingImpl::instance_count_(0);
571
AudioProcessingImpl(const AudioProcessing::Config & config,std::unique_ptr<CustomProcessing> capture_post_processor,std::unique_ptr<CustomProcessing> render_pre_processor,std::unique_ptr<EchoControlFactory> echo_control_factory,rtc::scoped_refptr<EchoDetector> echo_detector,std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)572 AudioProcessingImpl::AudioProcessingImpl(
573 const AudioProcessing::Config& config,
574 std::unique_ptr<CustomProcessing> capture_post_processor,
575 std::unique_ptr<CustomProcessing> render_pre_processor,
576 std::unique_ptr<EchoControlFactory> echo_control_factory,
577 rtc::scoped_refptr<EchoDetector> echo_detector,
578 std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)
579 : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
580 use_setup_specific_default_aec3_config_(
581 UseSetupSpecificDefaultAec3Congfig()),
582 input_volume_controller_config_override_(
583 GetInputVolumeControllerConfigOverride()),
584 use_denormal_disabler_(
585 !field_trial::IsEnabled("WebRTC-ApmDenormalDisablerKillSwitch")),
586 transient_suppressor_vad_mode_(GetTransientSuppressorVadMode()),
587 capture_runtime_settings_(RuntimeSettingQueueSize()),
588 render_runtime_settings_(RuntimeSettingQueueSize()),
589 capture_runtime_settings_enqueuer_(&capture_runtime_settings_),
590 render_runtime_settings_enqueuer_(&render_runtime_settings_),
591 echo_control_factory_(std::move(echo_control_factory)),
592 config_(AdjustConfig(config, input_volume_controller_config_override_)),
593 submodule_states_(!!capture_post_processor,
594 !!render_pre_processor,
595 !!capture_analyzer),
596 submodules_(std::move(capture_post_processor),
597 std::move(render_pre_processor),
598 std::move(echo_detector),
599 std::move(capture_analyzer)),
600 constants_(!field_trial::IsEnabled(
601 "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
602 !field_trial::IsEnabled(
603 "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
604 EnforceSplitBandHpf(),
605 MinimizeProcessingForUnusedOutput(),
606 field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")),
607 capture_(),
608 capture_nonlocked_(),
609 applied_input_volume_stats_reporter_(
610 InputVolumeStatsReporter::InputVolumeType::kApplied),
611 recommended_input_volume_stats_reporter_(
612 InputVolumeStatsReporter::InputVolumeType::kRecommended) {
613 RTC_LOG(LS_INFO) << "Injected APM submodules:"
614 "\nEcho control factory: "
615 << !!echo_control_factory_
616 << "\nEcho detector: " << !!submodules_.echo_detector
617 << "\nCapture analyzer: " << !!submodules_.capture_analyzer
618 << "\nCapture post processor: "
619 << !!submodules_.capture_post_processor
620 << "\nRender pre processor: "
621 << !!submodules_.render_pre_processor;
622 if (!DenormalDisabler::IsSupported()) {
623 RTC_LOG(LS_INFO) << "Denormal disabler unsupported";
624 }
625
626 RTC_LOG(LS_INFO) << "AudioProcessing: " << config_.ToString();
627
628 // Mark Echo Controller enabled if a factory is injected.
629 capture_nonlocked_.echo_controller_enabled =
630 static_cast<bool>(echo_control_factory_);
631
632 Initialize();
633 }
634
635 AudioProcessingImpl::~AudioProcessingImpl() = default;
636
Initialize()637 int AudioProcessingImpl::Initialize() {
638 // Run in a single-threaded manner during initialization.
639 MutexLock lock_render(&mutex_render_);
640 MutexLock lock_capture(&mutex_capture_);
641 InitializeLocked();
642 return kNoError;
643 }
644
Initialize(const ProcessingConfig & processing_config)645 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
646 // Run in a single-threaded manner during initialization.
647 MutexLock lock_render(&mutex_render_);
648 MutexLock lock_capture(&mutex_capture_);
649 InitializeLocked(processing_config);
650 return kNoError;
651 }
652
MaybeInitializeRender(const StreamConfig & input_config,const StreamConfig & output_config)653 void AudioProcessingImpl::MaybeInitializeRender(
654 const StreamConfig& input_config,
655 const StreamConfig& output_config) {
656 ProcessingConfig processing_config = formats_.api_format;
657 processing_config.reverse_input_stream() = input_config;
658 processing_config.reverse_output_stream() = output_config;
659
660 if (processing_config == formats_.api_format) {
661 return;
662 }
663
664 MutexLock lock_capture(&mutex_capture_);
665 InitializeLocked(processing_config);
666 }
667
InitializeLocked()668 void AudioProcessingImpl::InitializeLocked() {
669 UpdateActiveSubmoduleStates();
670
671 const int render_audiobuffer_sample_rate_hz =
672 formats_.api_format.reverse_output_stream().num_frames() == 0
673 ? formats_.render_processing_format.sample_rate_hz()
674 : formats_.api_format.reverse_output_stream().sample_rate_hz();
675 if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
676 render_.render_audio.reset(new AudioBuffer(
677 formats_.api_format.reverse_input_stream().sample_rate_hz(),
678 formats_.api_format.reverse_input_stream().num_channels(),
679 formats_.render_processing_format.sample_rate_hz(),
680 formats_.render_processing_format.num_channels(),
681 render_audiobuffer_sample_rate_hz,
682 formats_.render_processing_format.num_channels()));
683 if (formats_.api_format.reverse_input_stream() !=
684 formats_.api_format.reverse_output_stream()) {
685 render_.render_converter = AudioConverter::Create(
686 formats_.api_format.reverse_input_stream().num_channels(),
687 formats_.api_format.reverse_input_stream().num_frames(),
688 formats_.api_format.reverse_output_stream().num_channels(),
689 formats_.api_format.reverse_output_stream().num_frames());
690 } else {
691 render_.render_converter.reset(nullptr);
692 }
693 } else {
694 render_.render_audio.reset(nullptr);
695 render_.render_converter.reset(nullptr);
696 }
697
698 capture_.capture_audio.reset(new AudioBuffer(
699 formats_.api_format.input_stream().sample_rate_hz(),
700 formats_.api_format.input_stream().num_channels(),
701 capture_nonlocked_.capture_processing_format.sample_rate_hz(),
702 formats_.api_format.output_stream().num_channels(),
703 formats_.api_format.output_stream().sample_rate_hz(),
704 formats_.api_format.output_stream().num_channels()));
705 SetDownmixMethod(*capture_.capture_audio,
706 config_.pipeline.capture_downmix_method);
707
708 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() <
709 formats_.api_format.output_stream().sample_rate_hz() &&
710 formats_.api_format.output_stream().sample_rate_hz() == 48000) {
711 capture_.capture_fullband_audio.reset(
712 new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(),
713 formats_.api_format.input_stream().num_channels(),
714 formats_.api_format.output_stream().sample_rate_hz(),
715 formats_.api_format.output_stream().num_channels(),
716 formats_.api_format.output_stream().sample_rate_hz(),
717 formats_.api_format.output_stream().num_channels()));
718 SetDownmixMethod(*capture_.capture_fullband_audio,
719 config_.pipeline.capture_downmix_method);
720 } else {
721 capture_.capture_fullband_audio.reset();
722 }
723
724 AllocateRenderQueue();
725
726 InitializeGainController1();
727 InitializeTransientSuppressor();
728 InitializeHighPassFilter(true);
729 InitializeResidualEchoDetector();
730 InitializeEchoController();
731 InitializeGainController2(/*config_has_changed=*/true);
732 InitializeVoiceActivityDetector(/*config_has_changed=*/true);
733 InitializeNoiseSuppressor();
734 InitializeAnalyzer();
735 InitializePostProcessor();
736 InitializePreProcessor();
737 InitializeCaptureLevelsAdjuster();
738
739 if (aec_dump_) {
740 aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
741 }
742 }
743
InitializeLocked(const ProcessingConfig & config)744 void AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
745 UpdateActiveSubmoduleStates();
746
747 formats_.api_format = config;
748
749 // Choose maximum rate to use for the split filtering.
750 RTC_DCHECK(config_.pipeline.maximum_internal_processing_rate == 48000 ||
751 config_.pipeline.maximum_internal_processing_rate == 32000);
752 int max_splitting_rate = 48000;
753 if (config_.pipeline.maximum_internal_processing_rate == 32000) {
754 max_splitting_rate = config_.pipeline.maximum_internal_processing_rate;
755 }
756
757 int capture_processing_rate = SuitableProcessRate(
758 std::min(formats_.api_format.input_stream().sample_rate_hz(),
759 formats_.api_format.output_stream().sample_rate_hz()),
760 max_splitting_rate,
761 submodule_states_.CaptureMultiBandSubModulesActive() ||
762 submodule_states_.RenderMultiBandSubModulesActive());
763 RTC_DCHECK_NE(8000, capture_processing_rate);
764
765 capture_nonlocked_.capture_processing_format =
766 StreamConfig(capture_processing_rate);
767
768 int render_processing_rate;
769 if (!capture_nonlocked_.echo_controller_enabled) {
770 render_processing_rate = SuitableProcessRate(
771 std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
772 formats_.api_format.reverse_output_stream().sample_rate_hz()),
773 max_splitting_rate,
774 submodule_states_.CaptureMultiBandSubModulesActive() ||
775 submodule_states_.RenderMultiBandSubModulesActive());
776 } else {
777 render_processing_rate = capture_processing_rate;
778 }
779
780 // If the forward sample rate is 8 kHz, the render stream is also processed
781 // at this rate.
782 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
783 kSampleRate8kHz) {
784 render_processing_rate = kSampleRate8kHz;
785 } else {
786 render_processing_rate =
787 std::max(render_processing_rate, static_cast<int>(kSampleRate16kHz));
788 }
789
790 RTC_DCHECK_NE(8000, render_processing_rate);
791
792 if (submodule_states_.RenderMultiBandSubModulesActive()) {
793 // By default, downmix the render stream to mono for analysis. This has been
794 // demonstrated to work well for AEC in most practical scenarios.
795 const bool multi_channel_render = config_.pipeline.multi_channel_render &&
796 constants_.multi_channel_render_support;
797 int render_processing_num_channels =
798 multi_channel_render
799 ? formats_.api_format.reverse_input_stream().num_channels()
800 : 1;
801 formats_.render_processing_format =
802 StreamConfig(render_processing_rate, render_processing_num_channels);
803 } else {
804 formats_.render_processing_format = StreamConfig(
805 formats_.api_format.reverse_input_stream().sample_rate_hz(),
806 formats_.api_format.reverse_input_stream().num_channels());
807 }
808
809 if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
810 kSampleRate32kHz ||
811 capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
812 kSampleRate48kHz) {
813 capture_nonlocked_.split_rate = kSampleRate16kHz;
814 } else {
815 capture_nonlocked_.split_rate =
816 capture_nonlocked_.capture_processing_format.sample_rate_hz();
817 }
818
819 InitializeLocked();
820 }
821
ApplyConfig(const AudioProcessing::Config & config)822 void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
823 // Run in a single-threaded manner when applying the settings.
824 MutexLock lock_render(&mutex_render_);
825 MutexLock lock_capture(&mutex_capture_);
826
827 // TODO(bugs.webrtc.org/7494): Replace `adjusted_config` with `config` after
828 // "WebRTC-Audio-InputVolumeControllerExperiment" field trial is removed.
829 const auto adjusted_config =
830 AdjustConfig(config, input_volume_controller_config_override_);
831
832 RTC_LOG(LS_INFO) << "AudioProcessing::ApplyConfig: "
833 << adjusted_config.ToString();
834
835 const bool pipeline_config_changed =
836 config_.pipeline.multi_channel_render !=
837 adjusted_config.pipeline.multi_channel_render ||
838 config_.pipeline.multi_channel_capture !=
839 adjusted_config.pipeline.multi_channel_capture ||
840 config_.pipeline.maximum_internal_processing_rate !=
841 adjusted_config.pipeline.maximum_internal_processing_rate ||
842 config_.pipeline.capture_downmix_method !=
843 adjusted_config.pipeline.capture_downmix_method;
844
845 const bool aec_config_changed =
846 config_.echo_canceller.enabled !=
847 adjusted_config.echo_canceller.enabled ||
848 config_.echo_canceller.mobile_mode !=
849 adjusted_config.echo_canceller.mobile_mode;
850
851 const bool agc1_config_changed =
852 config_.gain_controller1 != adjusted_config.gain_controller1;
853
854 const bool agc2_config_changed =
855 config_.gain_controller2 != adjusted_config.gain_controller2;
856
857 const bool ns_config_changed =
858 config_.noise_suppression.enabled !=
859 adjusted_config.noise_suppression.enabled ||
860 config_.noise_suppression.level !=
861 adjusted_config.noise_suppression.level;
862
863 const bool ts_config_changed = config_.transient_suppression.enabled !=
864 adjusted_config.transient_suppression.enabled;
865
866 const bool pre_amplifier_config_changed =
867 config_.pre_amplifier.enabled != adjusted_config.pre_amplifier.enabled ||
868 config_.pre_amplifier.fixed_gain_factor !=
869 adjusted_config.pre_amplifier.fixed_gain_factor;
870
871 const bool gain_adjustment_config_changed =
872 config_.capture_level_adjustment !=
873 adjusted_config.capture_level_adjustment;
874
875 config_ = adjusted_config;
876
877 if (aec_config_changed) {
878 InitializeEchoController();
879 }
880
881 if (ns_config_changed) {
882 InitializeNoiseSuppressor();
883 }
884
885 if (ts_config_changed) {
886 InitializeTransientSuppressor();
887 }
888
889 InitializeHighPassFilter(false);
890
891 if (agc1_config_changed) {
892 InitializeGainController1();
893 }
894
895 const bool config_ok = GainController2::Validate(config_.gain_controller2);
896 if (!config_ok) {
897 RTC_LOG(LS_ERROR)
898 << "Invalid Gain Controller 2 config; using the default config.";
899 config_.gain_controller2 = AudioProcessing::Config::GainController2();
900 }
901
902 InitializeGainController2(agc2_config_changed);
903 InitializeVoiceActivityDetector(agc2_config_changed);
904
905 if (pre_amplifier_config_changed || gain_adjustment_config_changed) {
906 InitializeCaptureLevelsAdjuster();
907 }
908
909 // Reinitialization must happen after all submodule configuration to avoid
910 // additional reinitializations on the next capture / render processing call.
911 if (pipeline_config_changed) {
912 InitializeLocked(formats_.api_format);
913 }
914 }
915
OverrideSubmoduleCreationForTesting(const ApmSubmoduleCreationOverrides & overrides)916 void AudioProcessingImpl::OverrideSubmoduleCreationForTesting(
917 const ApmSubmoduleCreationOverrides& overrides) {
918 MutexLock lock(&mutex_capture_);
919 submodule_creation_overrides_ = overrides;
920 }
921
proc_sample_rate_hz() const922 int AudioProcessingImpl::proc_sample_rate_hz() const {
923 // Used as callback from submodules, hence locking is not allowed.
924 return capture_nonlocked_.capture_processing_format.sample_rate_hz();
925 }
926
proc_fullband_sample_rate_hz() const927 int AudioProcessingImpl::proc_fullband_sample_rate_hz() const {
928 return capture_.capture_fullband_audio
929 ? capture_.capture_fullband_audio->num_frames() * 100
930 : capture_nonlocked_.capture_processing_format.sample_rate_hz();
931 }
932
proc_split_sample_rate_hz() const933 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
934 // Used as callback from submodules, hence locking is not allowed.
935 return capture_nonlocked_.split_rate;
936 }
937
num_reverse_channels() const938 size_t AudioProcessingImpl::num_reverse_channels() const {
939 // Used as callback from submodules, hence locking is not allowed.
940 return formats_.render_processing_format.num_channels();
941 }
942
num_input_channels() const943 size_t AudioProcessingImpl::num_input_channels() const {
944 // Used as callback from submodules, hence locking is not allowed.
945 return formats_.api_format.input_stream().num_channels();
946 }
947
num_proc_channels() const948 size_t AudioProcessingImpl::num_proc_channels() const {
949 // Used as callback from submodules, hence locking is not allowed.
950 const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
951 constants_.multi_channel_capture_support;
952 if (capture_nonlocked_.echo_controller_enabled && !multi_channel_capture) {
953 return 1;
954 }
955 return num_output_channels();
956 }
957
num_output_channels() const958 size_t AudioProcessingImpl::num_output_channels() const {
959 // Used as callback from submodules, hence locking is not allowed.
960 return formats_.api_format.output_stream().num_channels();
961 }
962
set_output_will_be_muted(bool muted)963 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
964 MutexLock lock(&mutex_capture_);
965 HandleCaptureOutputUsedSetting(!muted);
966 }
967
HandleCaptureOutputUsedSetting(bool capture_output_used)968 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
969 bool capture_output_used) {
970 capture_.capture_output_used =
971 capture_output_used || !constants_.minimize_processing_for_unused_output;
972
973 if (submodules_.agc_manager.get()) {
974 submodules_.agc_manager->HandleCaptureOutputUsedChange(
975 capture_.capture_output_used);
976 }
977 if (submodules_.echo_controller) {
978 submodules_.echo_controller->SetCaptureOutputUsage(
979 capture_.capture_output_used);
980 }
981 if (submodules_.noise_suppressor) {
982 submodules_.noise_suppressor->SetCaptureOutputUsage(
983 capture_.capture_output_used);
984 }
985 if (submodules_.gain_controller2) {
986 submodules_.gain_controller2->SetCaptureOutputUsed(
987 capture_.capture_output_used);
988 }
989 }
990
SetRuntimeSetting(RuntimeSetting setting)991 void AudioProcessingImpl::SetRuntimeSetting(RuntimeSetting setting) {
992 PostRuntimeSetting(setting);
993 }
994
PostRuntimeSetting(RuntimeSetting setting)995 bool AudioProcessingImpl::PostRuntimeSetting(RuntimeSetting setting) {
996 switch (setting.type()) {
997 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
998 case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
999 return render_runtime_settings_enqueuer_.Enqueue(setting);
1000 case RuntimeSetting::Type::kCapturePreGain:
1001 case RuntimeSetting::Type::kCapturePostGain:
1002 case RuntimeSetting::Type::kCaptureCompressionGain:
1003 case RuntimeSetting::Type::kCaptureFixedPostGain:
1004 case RuntimeSetting::Type::kCaptureOutputUsed:
1005 return capture_runtime_settings_enqueuer_.Enqueue(setting);
1006 case RuntimeSetting::Type::kPlayoutVolumeChange: {
1007 bool enqueueing_successful;
1008 enqueueing_successful =
1009 capture_runtime_settings_enqueuer_.Enqueue(setting);
1010 enqueueing_successful =
1011 render_runtime_settings_enqueuer_.Enqueue(setting) &&
1012 enqueueing_successful;
1013 return enqueueing_successful;
1014 }
1015 case RuntimeSetting::Type::kNotSpecified:
1016 RTC_DCHECK_NOTREACHED();
1017 return true;
1018 }
1019 // The language allows the enum to have a non-enumerator
1020 // value. Check that this doesn't happen.
1021 RTC_DCHECK_NOTREACHED();
1022 return true;
1023 }
1024
RuntimeSettingEnqueuer(SwapQueue<RuntimeSetting> * runtime_settings)1025 AudioProcessingImpl::RuntimeSettingEnqueuer::RuntimeSettingEnqueuer(
1026 SwapQueue<RuntimeSetting>* runtime_settings)
1027 : runtime_settings_(*runtime_settings) {
1028 RTC_DCHECK(runtime_settings);
1029 }
1030
1031 AudioProcessingImpl::RuntimeSettingEnqueuer::~RuntimeSettingEnqueuer() =
1032 default;
1033
Enqueue(RuntimeSetting setting)1034 bool AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue(
1035 RuntimeSetting setting) {
1036 const bool successful_insert = runtime_settings_.Insert(&setting);
1037
1038 if (!successful_insert) {
1039 RTC_LOG(LS_ERROR) << "Cannot enqueue a new runtime setting.";
1040 }
1041 return successful_insert;
1042 }
1043
MaybeInitializeCapture(const StreamConfig & input_config,const StreamConfig & output_config)1044 void AudioProcessingImpl::MaybeInitializeCapture(
1045 const StreamConfig& input_config,
1046 const StreamConfig& output_config) {
1047 ProcessingConfig processing_config;
1048 bool reinitialization_required = false;
1049 {
1050 // Acquire the capture lock in order to access api_format. The lock is
1051 // released immediately, as we may need to acquire the render lock as part
1052 // of the conditional reinitialization.
1053 MutexLock lock_capture(&mutex_capture_);
1054 processing_config = formats_.api_format;
1055 reinitialization_required = UpdateActiveSubmoduleStates();
1056 }
1057
1058 if (processing_config.input_stream() != input_config) {
1059 reinitialization_required = true;
1060 }
1061
1062 if (processing_config.output_stream() != output_config) {
1063 reinitialization_required = true;
1064 }
1065
1066 if (reinitialization_required) {
1067 MutexLock lock_render(&mutex_render_);
1068 MutexLock lock_capture(&mutex_capture_);
1069 // Reread the API format since the render format may have changed.
1070 processing_config = formats_.api_format;
1071 processing_config.input_stream() = input_config;
1072 processing_config.output_stream() = output_config;
1073 InitializeLocked(processing_config);
1074 }
1075 }
1076
ProcessStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)1077 int AudioProcessingImpl::ProcessStream(const float* const* src,
1078 const StreamConfig& input_config,
1079 const StreamConfig& output_config,
1080 float* const* dest) {
1081 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
1082 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1083 RETURN_ON_ERR(
1084 HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1085 MaybeInitializeCapture(input_config, output_config);
1086
1087 MutexLock lock_capture(&mutex_capture_);
1088
1089 if (aec_dump_) {
1090 RecordUnprocessedCaptureStream(src);
1091 }
1092
1093 capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
1094 if (capture_.capture_fullband_audio) {
1095 capture_.capture_fullband_audio->CopyFrom(
1096 src, formats_.api_format.input_stream());
1097 }
1098 RETURN_ON_ERR(ProcessCaptureStreamLocked());
1099 if (capture_.capture_fullband_audio) {
1100 capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(),
1101 dest);
1102 } else {
1103 capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
1104 }
1105
1106 if (aec_dump_) {
1107 RecordProcessedCaptureStream(dest);
1108 }
1109 return kNoError;
1110 }
1111
HandleCaptureRuntimeSettings()1112 void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
1113 RuntimeSetting setting;
1114 int num_settings_processed = 0;
1115 while (capture_runtime_settings_.Remove(&setting)) {
1116 if (aec_dump_) {
1117 aec_dump_->WriteRuntimeSetting(setting);
1118 }
1119 switch (setting.type()) {
1120 case RuntimeSetting::Type::kCapturePreGain:
1121 if (config_.pre_amplifier.enabled ||
1122 config_.capture_level_adjustment.enabled) {
1123 float value;
1124 setting.GetFloat(&value);
1125 // If the pre-amplifier is used, apply the new gain to the
1126 // pre-amplifier regardless if the capture level adjustment is
1127 // activated. This approach allows both functionalities to coexist
1128 // until they have been properly merged.
1129 if (config_.pre_amplifier.enabled) {
1130 config_.pre_amplifier.fixed_gain_factor = value;
1131 } else {
1132 config_.capture_level_adjustment.pre_gain_factor = value;
1133 }
1134
1135 // Use both the pre-amplifier and the capture level adjustment gains
1136 // as pre-gains.
1137 float gain = 1.f;
1138 if (config_.pre_amplifier.enabled) {
1139 gain *= config_.pre_amplifier.fixed_gain_factor;
1140 }
1141 if (config_.capture_level_adjustment.enabled) {
1142 gain *= config_.capture_level_adjustment.pre_gain_factor;
1143 }
1144
1145 submodules_.capture_levels_adjuster->SetPreGain(gain);
1146 }
1147 // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump.
1148 break;
1149 case RuntimeSetting::Type::kCapturePostGain:
1150 if (config_.capture_level_adjustment.enabled) {
1151 float value;
1152 setting.GetFloat(&value);
1153 config_.capture_level_adjustment.post_gain_factor = value;
1154 submodules_.capture_levels_adjuster->SetPostGain(
1155 config_.capture_level_adjustment.post_gain_factor);
1156 }
1157 // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump.
1158 break;
1159 case RuntimeSetting::Type::kCaptureCompressionGain: {
1160 if (!submodules_.agc_manager &&
1161 !(submodules_.gain_controller2 &&
1162 config_.gain_controller2.input_volume_controller.enabled)) {
1163 float value;
1164 setting.GetFloat(&value);
1165 int int_value = static_cast<int>(value + .5f);
1166 config_.gain_controller1.compression_gain_db = int_value;
1167 if (submodules_.gain_control) {
1168 int error =
1169 submodules_.gain_control->set_compression_gain_db(int_value);
1170 RTC_DCHECK_EQ(kNoError, error);
1171 }
1172 }
1173 break;
1174 }
1175 case RuntimeSetting::Type::kCaptureFixedPostGain: {
1176 if (submodules_.gain_controller2) {
1177 float value;
1178 setting.GetFloat(&value);
1179 config_.gain_controller2.fixed_digital.gain_db = value;
1180 submodules_.gain_controller2->SetFixedGainDb(value);
1181 }
1182 break;
1183 }
1184 case RuntimeSetting::Type::kPlayoutVolumeChange: {
1185 int value;
1186 setting.GetInt(&value);
1187 capture_.playout_volume = value;
1188 break;
1189 }
1190 case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
1191 RTC_DCHECK_NOTREACHED();
1192 break;
1193 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
1194 RTC_DCHECK_NOTREACHED();
1195 break;
1196 case RuntimeSetting::Type::kNotSpecified:
1197 RTC_DCHECK_NOTREACHED();
1198 break;
1199 case RuntimeSetting::Type::kCaptureOutputUsed:
1200 bool value;
1201 setting.GetBool(&value);
1202 HandleCaptureOutputUsedSetting(value);
1203 break;
1204 }
1205 ++num_settings_processed;
1206 }
1207
1208 if (num_settings_processed >= RuntimeSettingQueueSize()) {
1209 // Handle overrun of the runtime settings queue, which likely will has
1210 // caused settings to be discarded.
1211 HandleOverrunInCaptureRuntimeSettingsQueue();
1212 }
1213 }
1214
HandleOverrunInCaptureRuntimeSettingsQueue()1215 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
1216 // Fall back to a safe state for the case when a setting for capture output
1217 // usage setting has been missed.
1218 HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
1219 }
1220
HandleRenderRuntimeSettings()1221 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
1222 RuntimeSetting setting;
1223 while (render_runtime_settings_.Remove(&setting)) {
1224 if (aec_dump_) {
1225 aec_dump_->WriteRuntimeSetting(setting);
1226 }
1227 switch (setting.type()) {
1228 case RuntimeSetting::Type::kPlayoutAudioDeviceChange: // fall-through
1229 case RuntimeSetting::Type::kPlayoutVolumeChange: // fall-through
1230 case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
1231 if (submodules_.render_pre_processor) {
1232 submodules_.render_pre_processor->SetRuntimeSetting(setting);
1233 }
1234 break;
1235 case RuntimeSetting::Type::kCapturePreGain: // fall-through
1236 case RuntimeSetting::Type::kCapturePostGain: // fall-through
1237 case RuntimeSetting::Type::kCaptureCompressionGain: // fall-through
1238 case RuntimeSetting::Type::kCaptureFixedPostGain: // fall-through
1239 case RuntimeSetting::Type::kCaptureOutputUsed: // fall-through
1240 case RuntimeSetting::Type::kNotSpecified:
1241 RTC_DCHECK_NOTREACHED();
1242 break;
1243 }
1244 }
1245 }
1246
QueueBandedRenderAudio(AudioBuffer * audio)1247 void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
1248 RTC_DCHECK_GE(160, audio->num_frames_per_band());
1249
1250 if (submodules_.echo_control_mobile) {
1251 EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(),
1252 num_reverse_channels(),
1253 &aecm_render_queue_buffer_);
1254 RTC_DCHECK(aecm_render_signal_queue_);
1255 // Insert the samples into the queue.
1256 if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) {
1257 // The data queue is full and needs to be emptied.
1258 EmptyQueuedRenderAudio();
1259
1260 // Retry the insert (should always work).
1261 bool result =
1262 aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_);
1263 RTC_DCHECK(result);
1264 }
1265 }
1266
1267 if (!submodules_.agc_manager && submodules_.gain_control) {
1268 GainControlImpl::PackRenderAudioBuffer(*audio, &agc_render_queue_buffer_);
1269 // Insert the samples into the queue.
1270 if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) {
1271 // The data queue is full and needs to be emptied.
1272 EmptyQueuedRenderAudio();
1273
1274 // Retry the insert (should always work).
1275 bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_);
1276 RTC_DCHECK(result);
1277 }
1278 }
1279 }
1280
QueueNonbandedRenderAudio(AudioBuffer * audio)1281 void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
1282 if (submodules_.echo_detector) {
1283 PackRenderAudioBufferForEchoDetector(*audio, red_render_queue_buffer_);
1284 RTC_DCHECK(red_render_signal_queue_);
1285 // Insert the samples into the queue.
1286 if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) {
1287 // The data queue is full and needs to be emptied.
1288 EmptyQueuedRenderAudio();
1289
1290 // Retry the insert (should always work).
1291 bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_);
1292 RTC_DCHECK(result);
1293 }
1294 }
1295 }
1296
AllocateRenderQueue()1297 void AudioProcessingImpl::AllocateRenderQueue() {
1298 const size_t new_agc_render_queue_element_max_size =
1299 std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);
1300
1301 const size_t new_red_render_queue_element_max_size =
1302 std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
1303
1304 // Reallocate the queues if the queue item sizes are too small to fit the
1305 // data to put in the queues.
1306
1307 if (agc_render_queue_element_max_size_ <
1308 new_agc_render_queue_element_max_size) {
1309 agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size;
1310
1311 std::vector<int16_t> template_queue_element(
1312 agc_render_queue_element_max_size_);
1313
1314 agc_render_signal_queue_.reset(
1315 new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
1316 kMaxNumFramesToBuffer, template_queue_element,
1317 RenderQueueItemVerifier<int16_t>(
1318 agc_render_queue_element_max_size_)));
1319
1320 agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_);
1321 agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_);
1322 } else {
1323 agc_render_signal_queue_->Clear();
1324 }
1325
1326 if (submodules_.echo_detector) {
1327 if (red_render_queue_element_max_size_ <
1328 new_red_render_queue_element_max_size) {
1329 red_render_queue_element_max_size_ =
1330 new_red_render_queue_element_max_size;
1331
1332 std::vector<float> template_queue_element(
1333 red_render_queue_element_max_size_);
1334
1335 red_render_signal_queue_.reset(
1336 new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>(
1337 kMaxNumFramesToBuffer, template_queue_element,
1338 RenderQueueItemVerifier<float>(
1339 red_render_queue_element_max_size_)));
1340
1341 red_render_queue_buffer_.resize(red_render_queue_element_max_size_);
1342 red_capture_queue_buffer_.resize(red_render_queue_element_max_size_);
1343 } else {
1344 red_render_signal_queue_->Clear();
1345 }
1346 }
1347 }
1348
EmptyQueuedRenderAudio()1349 void AudioProcessingImpl::EmptyQueuedRenderAudio() {
1350 MutexLock lock_capture(&mutex_capture_);
1351 EmptyQueuedRenderAudioLocked();
1352 }
1353
EmptyQueuedRenderAudioLocked()1354 void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() {
1355 if (submodules_.echo_control_mobile) {
1356 RTC_DCHECK(aecm_render_signal_queue_);
1357 while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) {
1358 submodules_.echo_control_mobile->ProcessRenderAudio(
1359 aecm_capture_queue_buffer_);
1360 }
1361 }
1362
1363 if (submodules_.gain_control) {
1364 while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) {
1365 submodules_.gain_control->ProcessRenderAudio(agc_capture_queue_buffer_);
1366 }
1367 }
1368
1369 if (submodules_.echo_detector) {
1370 while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) {
1371 submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_);
1372 }
1373 }
1374 }
1375
ProcessStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1376 int AudioProcessingImpl::ProcessStream(const int16_t* const src,
1377 const StreamConfig& input_config,
1378 const StreamConfig& output_config,
1379 int16_t* const dest) {
1380 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
1381
1382 RETURN_ON_ERR(
1383 HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1384 MaybeInitializeCapture(input_config, output_config);
1385
1386 MutexLock lock_capture(&mutex_capture_);
1387 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1388
1389 if (aec_dump_) {
1390 RecordUnprocessedCaptureStream(src, input_config);
1391 }
1392
1393 capture_.capture_audio->CopyFrom(src, input_config);
1394 if (capture_.capture_fullband_audio) {
1395 capture_.capture_fullband_audio->CopyFrom(src, input_config);
1396 }
1397 RETURN_ON_ERR(ProcessCaptureStreamLocked());
1398 if (submodule_states_.CaptureMultiBandProcessingPresent() ||
1399 submodule_states_.CaptureFullBandProcessingActive()) {
1400 if (capture_.capture_fullband_audio) {
1401 capture_.capture_fullband_audio->CopyTo(output_config, dest);
1402 } else {
1403 capture_.capture_audio->CopyTo(output_config, dest);
1404 }
1405 }
1406
1407 if (aec_dump_) {
1408 RecordProcessedCaptureStream(dest, output_config);
1409 }
1410 return kNoError;
1411 }
1412
ProcessCaptureStreamLocked()1413 int AudioProcessingImpl::ProcessCaptureStreamLocked() {
1414 EmptyQueuedRenderAudioLocked();
1415 HandleCaptureRuntimeSettings();
1416 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1417
1418 // Ensure that not both the AEC and AECM are active at the same time.
1419 // TODO(peah): Simplify once the public API Enable functions for these
1420 // are moved to APM.
1421 RTC_DCHECK_LE(
1422 !!submodules_.echo_controller + !!submodules_.echo_control_mobile, 1);
1423
1424 data_dumper_->DumpRaw(
1425 "applied_input_volume",
1426 capture_.applied_input_volume.value_or(kUnspecifiedDataDumpInputVolume));
1427
1428 AudioBuffer* capture_buffer = capture_.capture_audio.get(); // For brevity.
1429 AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1430
1431 if (submodules_.high_pass_filter &&
1432 config_.high_pass_filter.apply_in_full_band &&
1433 !constants_.enforce_split_band_hpf) {
1434 submodules_.high_pass_filter->Process(capture_buffer,
1435 /*use_split_band_data=*/false);
1436 }
1437
1438 if (submodules_.capture_levels_adjuster) {
1439 if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) {
1440 // When the input volume is emulated, retrieve the volume applied to the
1441 // input audio and notify that to APM so that the volume is passed to the
1442 // active AGC.
1443 set_stream_analog_level_locked(
1444 submodules_.capture_levels_adjuster->GetAnalogMicGainLevel());
1445 }
1446 submodules_.capture_levels_adjuster->ApplyPreLevelAdjustment(
1447 *capture_buffer);
1448 }
1449
1450 capture_input_rms_.Analyze(rtc::ArrayView<const float>(
1451 capture_buffer->channels_const()[0],
1452 capture_nonlocked_.capture_processing_format.num_frames()));
1453 const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
1454 if (log_rms) {
1455 capture_rms_interval_counter_ = 0;
1456 RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak();
1457 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms",
1458 levels.average, 1, RmsLevel::kMinLevelDb, 64);
1459 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms",
1460 levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1461 }
1462
1463 if (capture_.applied_input_volume.has_value()) {
1464 applied_input_volume_stats_reporter_.UpdateStatistics(
1465 *capture_.applied_input_volume);
1466 }
1467
1468 if (submodules_.echo_controller) {
1469 // Determine if the echo path gain has changed by checking all the gains
1470 // applied before AEC.
1471 capture_.echo_path_gain_change = capture_.applied_input_volume_changed;
1472
1473 // Detect and flag any change in the capture level adjustment pre-gain.
1474 if (submodules_.capture_levels_adjuster) {
1475 float pre_adjustment_gain =
1476 submodules_.capture_levels_adjuster->GetPreAdjustmentGain();
1477 capture_.echo_path_gain_change =
1478 capture_.echo_path_gain_change ||
1479 (capture_.prev_pre_adjustment_gain != pre_adjustment_gain &&
1480 capture_.prev_pre_adjustment_gain >= 0.0f);
1481 capture_.prev_pre_adjustment_gain = pre_adjustment_gain;
1482 }
1483
1484 // Detect volume change.
1485 capture_.echo_path_gain_change =
1486 capture_.echo_path_gain_change ||
1487 (capture_.prev_playout_volume != capture_.playout_volume &&
1488 capture_.prev_playout_volume >= 0);
1489 capture_.prev_playout_volume = capture_.playout_volume;
1490
1491 submodules_.echo_controller->AnalyzeCapture(capture_buffer);
1492 }
1493
1494 if (submodules_.agc_manager) {
1495 submodules_.agc_manager->AnalyzePreProcess(*capture_buffer);
1496 }
1497
1498 if (submodules_.gain_controller2 &&
1499 config_.gain_controller2.input_volume_controller.enabled) {
1500 // Expect the volume to be available if the input controller is enabled.
1501 RTC_DCHECK(capture_.applied_input_volume.has_value());
1502 if (capture_.applied_input_volume.has_value()) {
1503 submodules_.gain_controller2->Analyze(*capture_.applied_input_volume,
1504 *capture_buffer);
1505 }
1506 }
1507
1508 if (submodule_states_.CaptureMultiBandSubModulesActive() &&
1509 SampleRateSupportsMultiBand(
1510 capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1511 capture_buffer->SplitIntoFrequencyBands();
1512 }
1513
1514 const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
1515 constants_.multi_channel_capture_support;
1516 if (submodules_.echo_controller && !multi_channel_capture) {
1517 // Force down-mixing of the number of channels after the detection of
1518 // capture signal saturation.
1519 // TODO(peah): Look into ensuring that this kind of tampering with the
1520 // AudioBuffer functionality should not be needed.
1521 capture_buffer->set_num_channels(1);
1522 }
1523
1524 if (submodules_.high_pass_filter &&
1525 (!config_.high_pass_filter.apply_in_full_band ||
1526 constants_.enforce_split_band_hpf)) {
1527 submodules_.high_pass_filter->Process(capture_buffer,
1528 /*use_split_band_data=*/true);
1529 }
1530
1531 if (submodules_.gain_control) {
1532 RETURN_ON_ERR(
1533 submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer));
1534 }
1535
1536 if ((!config_.noise_suppression.analyze_linear_aec_output_when_available ||
1537 !linear_aec_buffer || submodules_.echo_control_mobile) &&
1538 submodules_.noise_suppressor) {
1539 submodules_.noise_suppressor->Analyze(*capture_buffer);
1540 }
1541
1542 if (submodules_.echo_control_mobile) {
1543 // Ensure that the stream delay was set before the call to the
1544 // AECM ProcessCaptureAudio function.
1545 if (!capture_.was_stream_delay_set) {
1546 return AudioProcessing::kStreamParameterNotSetError;
1547 }
1548
1549 if (submodules_.noise_suppressor) {
1550 submodules_.noise_suppressor->Process(capture_buffer);
1551 }
1552
1553 RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio(
1554 capture_buffer, stream_delay_ms()));
1555 } else {
1556 if (submodules_.echo_controller) {
1557 data_dumper_->DumpRaw("stream_delay", stream_delay_ms());
1558
1559 if (capture_.was_stream_delay_set) {
1560 submodules_.echo_controller->SetAudioBufferDelay(stream_delay_ms());
1561 }
1562
1563 submodules_.echo_controller->ProcessCapture(
1564 capture_buffer, linear_aec_buffer, capture_.echo_path_gain_change);
1565 }
1566
1567 if (config_.noise_suppression.analyze_linear_aec_output_when_available &&
1568 linear_aec_buffer && submodules_.noise_suppressor) {
1569 submodules_.noise_suppressor->Analyze(*linear_aec_buffer);
1570 }
1571
1572 if (submodules_.noise_suppressor) {
1573 submodules_.noise_suppressor->Process(capture_buffer);
1574 }
1575 }
1576
1577 if (submodules_.agc_manager) {
1578 submodules_.agc_manager->Process(*capture_buffer);
1579
1580 absl::optional<int> new_digital_gain =
1581 submodules_.agc_manager->GetDigitalComressionGain();
1582 if (new_digital_gain && submodules_.gain_control) {
1583 submodules_.gain_control->set_compression_gain_db(*new_digital_gain);
1584 }
1585 }
1586
1587 if (submodules_.gain_control) {
1588 // TODO(peah): Add reporting from AEC3 whether there is echo.
1589 RETURN_ON_ERR(submodules_.gain_control->ProcessCaptureAudio(
1590 capture_buffer, /*stream_has_echo*/ false));
1591 }
1592
1593 if (submodule_states_.CaptureMultiBandProcessingPresent() &&
1594 SampleRateSupportsMultiBand(
1595 capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1596 capture_buffer->MergeFrequencyBands();
1597 }
1598
1599 if (capture_.capture_output_used) {
1600 if (capture_.capture_fullband_audio) {
1601 const auto& ec = submodules_.echo_controller;
1602 bool ec_active = ec ? ec->ActiveProcessing() : false;
1603 // Only update the fullband buffer if the multiband processing has changed
1604 // the signal. Keep the original signal otherwise.
1605 if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
1606 capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
1607 }
1608 capture_buffer = capture_.capture_fullband_audio.get();
1609 }
1610
1611 if (submodules_.echo_detector) {
1612 submodules_.echo_detector->AnalyzeCaptureAudio(
1613 rtc::ArrayView<const float>(capture_buffer->channels()[0],
1614 capture_buffer->num_frames()));
1615 }
1616
1617 absl::optional<float> voice_probability;
1618 if (!!submodules_.voice_activity_detector) {
1619 voice_probability = submodules_.voice_activity_detector->Analyze(
1620 AudioFrameView<const float>(capture_buffer->channels(),
1621 capture_buffer->num_channels(),
1622 capture_buffer->num_frames()));
1623 }
1624
1625 if (submodules_.transient_suppressor) {
1626 float transient_suppressor_voice_probability = 1.0f;
1627 switch (transient_suppressor_vad_mode_) {
1628 case TransientSuppressor::VadMode::kDefault:
1629 if (submodules_.agc_manager) {
1630 transient_suppressor_voice_probability =
1631 submodules_.agc_manager->voice_probability();
1632 }
1633 break;
1634 case TransientSuppressor::VadMode::kRnnVad:
1635 RTC_DCHECK(voice_probability.has_value());
1636 transient_suppressor_voice_probability = *voice_probability;
1637 break;
1638 case TransientSuppressor::VadMode::kNoVad:
1639 // The transient suppressor will ignore `voice_probability`.
1640 break;
1641 }
1642 float delayed_voice_probability =
1643 submodules_.transient_suppressor->Suppress(
1644 capture_buffer->channels()[0], capture_buffer->num_frames(),
1645 capture_buffer->num_channels(),
1646 capture_buffer->split_bands_const(0)[kBand0To8kHz],
1647 capture_buffer->num_frames_per_band(),
1648 /*reference_data=*/nullptr, /*reference_length=*/0,
1649 transient_suppressor_voice_probability, capture_.key_pressed);
1650 if (voice_probability.has_value()) {
1651 *voice_probability = delayed_voice_probability;
1652 }
1653 }
1654
1655 // Experimental APM sub-module that analyzes `capture_buffer`.
1656 if (submodules_.capture_analyzer) {
1657 submodules_.capture_analyzer->Analyze(capture_buffer);
1658 }
1659
1660 if (submodules_.gain_controller2) {
1661 // TODO(bugs.webrtc.org/7494): Let AGC2 detect applied input volume
1662 // changes.
1663 submodules_.gain_controller2->Process(
1664 voice_probability, capture_.applied_input_volume_changed,
1665 capture_buffer);
1666 }
1667
1668 if (submodules_.capture_post_processor) {
1669 submodules_.capture_post_processor->Process(capture_buffer);
1670 }
1671
1672 capture_output_rms_.Analyze(rtc::ArrayView<const float>(
1673 capture_buffer->channels_const()[0],
1674 capture_nonlocked_.capture_processing_format.num_frames()));
1675 if (log_rms) {
1676 RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
1677 RTC_HISTOGRAM_COUNTS_LINEAR(
1678 "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
1679 RmsLevel::kMinLevelDb, 64);
1680 RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
1681 levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1682 }
1683
1684 // Compute echo-detector stats.
1685 if (submodules_.echo_detector) {
1686 auto ed_metrics = submodules_.echo_detector->GetMetrics();
1687 capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
1688 capture_.stats.residual_echo_likelihood_recent_max =
1689 ed_metrics.echo_likelihood_recent_max;
1690 }
1691 }
1692
1693 // Compute echo-controller stats.
1694 if (submodules_.echo_controller) {
1695 auto ec_metrics = submodules_.echo_controller->GetMetrics();
1696 capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
1697 capture_.stats.echo_return_loss_enhancement =
1698 ec_metrics.echo_return_loss_enhancement;
1699 capture_.stats.delay_ms = ec_metrics.delay_ms;
1700 }
1701
1702 // Pass stats for reporting.
1703 stats_reporter_.UpdateStatistics(capture_.stats);
1704
1705 UpdateRecommendedInputVolumeLocked();
1706 if (capture_.recommended_input_volume.has_value()) {
1707 recommended_input_volume_stats_reporter_.UpdateStatistics(
1708 *capture_.recommended_input_volume);
1709 }
1710
1711 if (submodules_.capture_levels_adjuster) {
1712 submodules_.capture_levels_adjuster->ApplyPostLevelAdjustment(
1713 *capture_buffer);
1714
1715 if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) {
1716 // If the input volume emulation is used, retrieve the recommended input
1717 // volume and set that to emulate the input volume on the next processed
1718 // audio frame.
1719 RTC_DCHECK(capture_.recommended_input_volume.has_value());
1720 submodules_.capture_levels_adjuster->SetAnalogMicGainLevel(
1721 *capture_.recommended_input_volume);
1722 }
1723 }
1724
1725 // Temporarily set the output to zero after the stream has been unmuted
1726 // (capture output is again used). The purpose of this is to avoid clicks and
1727 // artefacts in the audio that results when the processing again is
1728 // reactivated after unmuting.
1729 if (!capture_.capture_output_used_last_frame &&
1730 capture_.capture_output_used) {
1731 for (size_t ch = 0; ch < capture_buffer->num_channels(); ++ch) {
1732 rtc::ArrayView<float> channel_view(capture_buffer->channels()[ch],
1733 capture_buffer->num_frames());
1734 std::fill(channel_view.begin(), channel_view.end(), 0.f);
1735 }
1736 }
1737 capture_.capture_output_used_last_frame = capture_.capture_output_used;
1738
1739 capture_.was_stream_delay_set = false;
1740
1741 data_dumper_->DumpRaw("recommended_input_volume",
1742 capture_.recommended_input_volume.value_or(
1743 kUnspecifiedDataDumpInputVolume));
1744
1745 return kNoError;
1746 }
1747
AnalyzeReverseStream(const float * const * data,const StreamConfig & reverse_config)1748 int AudioProcessingImpl::AnalyzeReverseStream(
1749 const float* const* data,
1750 const StreamConfig& reverse_config) {
1751 TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig");
1752 MutexLock lock(&mutex_render_);
1753 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1754 RTC_DCHECK(data);
1755 for (size_t i = 0; i < reverse_config.num_channels(); ++i) {
1756 RTC_DCHECK(data[i]);
1757 }
1758 RETURN_ON_ERR(
1759 AudioFormatValidityToErrorCode(ValidateAudioFormat(reverse_config)));
1760
1761 MaybeInitializeRender(reverse_config, reverse_config);
1762 return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
1763 }
1764
ProcessReverseStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)1765 int AudioProcessingImpl::ProcessReverseStream(const float* const* src,
1766 const StreamConfig& input_config,
1767 const StreamConfig& output_config,
1768 float* const* dest) {
1769 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
1770 MutexLock lock(&mutex_render_);
1771 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1772 RETURN_ON_ERR(
1773 HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1774
1775 MaybeInitializeRender(input_config, output_config);
1776
1777 RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
1778
1779 if (submodule_states_.RenderMultiBandProcessingActive() ||
1780 submodule_states_.RenderFullBandProcessingActive()) {
1781 render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
1782 dest);
1783 } else if (formats_.api_format.reverse_input_stream() !=
1784 formats_.api_format.reverse_output_stream()) {
1785 render_.render_converter->Convert(src, input_config.num_samples(), dest,
1786 output_config.num_samples());
1787 } else {
1788 CopyAudioIfNeeded(src, input_config.num_frames(),
1789 input_config.num_channels(), dest);
1790 }
1791
1792 return kNoError;
1793 }
1794
AnalyzeReverseStreamLocked(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config)1795 int AudioProcessingImpl::AnalyzeReverseStreamLocked(
1796 const float* const* src,
1797 const StreamConfig& input_config,
1798 const StreamConfig& output_config) {
1799 if (aec_dump_) {
1800 const size_t channel_size =
1801 formats_.api_format.reverse_input_stream().num_frames();
1802 const size_t num_channels =
1803 formats_.api_format.reverse_input_stream().num_channels();
1804 aec_dump_->WriteRenderStreamMessage(
1805 AudioFrameView<const float>(src, num_channels, channel_size));
1806 }
1807 render_.render_audio->CopyFrom(src,
1808 formats_.api_format.reverse_input_stream());
1809 return ProcessRenderStreamLocked();
1810 }
1811
ProcessReverseStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1812 int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
1813 const StreamConfig& input_config,
1814 const StreamConfig& output_config,
1815 int16_t* const dest) {
1816 TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
1817
1818 MutexLock lock(&mutex_render_);
1819 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1820
1821 RETURN_ON_ERR(
1822 HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1823 MaybeInitializeRender(input_config, output_config);
1824
1825 if (aec_dump_) {
1826 aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),
1827 input_config.num_channels());
1828 }
1829
1830 render_.render_audio->CopyFrom(src, input_config);
1831 RETURN_ON_ERR(ProcessRenderStreamLocked());
1832 if (submodule_states_.RenderMultiBandProcessingActive() ||
1833 submodule_states_.RenderFullBandProcessingActive()) {
1834 render_.render_audio->CopyTo(output_config, dest);
1835 }
1836 return kNoError;
1837 }
1838
ProcessRenderStreamLocked()1839 int AudioProcessingImpl::ProcessRenderStreamLocked() {
1840 AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity.
1841
1842 HandleRenderRuntimeSettings();
1843 DenormalDisabler denormal_disabler(use_denormal_disabler_);
1844
1845 if (submodules_.render_pre_processor) {
1846 submodules_.render_pre_processor->Process(render_buffer);
1847 }
1848
1849 QueueNonbandedRenderAudio(render_buffer);
1850
1851 if (submodule_states_.RenderMultiBandSubModulesActive() &&
1852 SampleRateSupportsMultiBand(
1853 formats_.render_processing_format.sample_rate_hz())) {
1854 render_buffer->SplitIntoFrequencyBands();
1855 }
1856
1857 if (submodule_states_.RenderMultiBandSubModulesActive()) {
1858 QueueBandedRenderAudio(render_buffer);
1859 }
1860
1861 // TODO(peah): Perform the queuing inside QueueRenderAudiuo().
1862 if (submodules_.echo_controller) {
1863 submodules_.echo_controller->AnalyzeRender(render_buffer);
1864 }
1865
1866 if (submodule_states_.RenderMultiBandProcessingActive() &&
1867 SampleRateSupportsMultiBand(
1868 formats_.render_processing_format.sample_rate_hz())) {
1869 render_buffer->MergeFrequencyBands();
1870 }
1871
1872 return kNoError;
1873 }
1874
set_stream_delay_ms(int delay)1875 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
1876 MutexLock lock(&mutex_capture_);
1877 Error retval = kNoError;
1878 capture_.was_stream_delay_set = true;
1879
1880 if (delay < 0) {
1881 delay = 0;
1882 retval = kBadStreamParameterWarning;
1883 }
1884
1885 // TODO(ajm): the max is rather arbitrarily chosen; investigate.
1886 if (delay > 500) {
1887 delay = 500;
1888 retval = kBadStreamParameterWarning;
1889 }
1890
1891 capture_nonlocked_.stream_delay_ms = delay;
1892 return retval;
1893 }
1894
GetLinearAecOutput(rtc::ArrayView<std::array<float,160>> linear_output) const1895 bool AudioProcessingImpl::GetLinearAecOutput(
1896 rtc::ArrayView<std::array<float, 160>> linear_output) const {
1897 MutexLock lock(&mutex_capture_);
1898 AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1899
1900 RTC_DCHECK(linear_aec_buffer);
1901 if (linear_aec_buffer) {
1902 RTC_DCHECK_EQ(1, linear_aec_buffer->num_bands());
1903 RTC_DCHECK_EQ(linear_output.size(), linear_aec_buffer->num_channels());
1904
1905 for (size_t ch = 0; ch < linear_aec_buffer->num_channels(); ++ch) {
1906 RTC_DCHECK_EQ(linear_output[ch].size(), linear_aec_buffer->num_frames());
1907 rtc::ArrayView<const float> channel_view =
1908 rtc::ArrayView<const float>(linear_aec_buffer->channels_const()[ch],
1909 linear_aec_buffer->num_frames());
1910 FloatS16ToFloat(channel_view.data(), channel_view.size(),
1911 linear_output[ch].data());
1912 }
1913 return true;
1914 }
1915 RTC_LOG(LS_ERROR) << "No linear AEC output available";
1916 RTC_DCHECK_NOTREACHED();
1917 return false;
1918 }
1919
stream_delay_ms() const1920 int AudioProcessingImpl::stream_delay_ms() const {
1921 // Used as callback from submodules, hence locking is not allowed.
1922 return capture_nonlocked_.stream_delay_ms;
1923 }
1924
set_stream_key_pressed(bool key_pressed)1925 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
1926 MutexLock lock(&mutex_capture_);
1927 capture_.key_pressed = key_pressed;
1928 }
1929
set_stream_analog_level(int level)1930 void AudioProcessingImpl::set_stream_analog_level(int level) {
1931 MutexLock lock_capture(&mutex_capture_);
1932 set_stream_analog_level_locked(level);
1933 }
1934
set_stream_analog_level_locked(int level)1935 void AudioProcessingImpl::set_stream_analog_level_locked(int level) {
1936 capture_.applied_input_volume_changed =
1937 capture_.applied_input_volume.has_value() &&
1938 *capture_.applied_input_volume != level;
1939 capture_.applied_input_volume = level;
1940
1941 // Invalidate any previously recommended input volume which will be updated by
1942 // `ProcessStream()`.
1943 capture_.recommended_input_volume = absl::nullopt;
1944
1945 if (submodules_.agc_manager) {
1946 submodules_.agc_manager->set_stream_analog_level(level);
1947 return;
1948 }
1949
1950 if (submodules_.gain_control) {
1951 int error = submodules_.gain_control->set_stream_analog_level(level);
1952 RTC_DCHECK_EQ(kNoError, error);
1953 return;
1954 }
1955 }
1956
recommended_stream_analog_level() const1957 int AudioProcessingImpl::recommended_stream_analog_level() const {
1958 MutexLock lock_capture(&mutex_capture_);
1959 if (!capture_.applied_input_volume.has_value()) {
1960 RTC_LOG(LS_ERROR) << "set_stream_analog_level has not been called";
1961 }
1962 // Input volume to recommend when `set_stream_analog_level()` is not called.
1963 constexpr int kFallBackInputVolume = 255;
1964 // When APM has no input volume to recommend, return the latest applied input
1965 // volume that has been observed in order to possibly produce no input volume
1966 // change. If no applied input volume has been observed, return a fall-back
1967 // value.
1968 return capture_.recommended_input_volume.value_or(
1969 capture_.applied_input_volume.value_or(kFallBackInputVolume));
1970 }
1971
UpdateRecommendedInputVolumeLocked()1972 void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() {
1973 if (!capture_.applied_input_volume.has_value()) {
1974 // When `set_stream_analog_level()` is not called, no input level can be
1975 // recommended.
1976 capture_.recommended_input_volume = absl::nullopt;
1977 return;
1978 }
1979
1980 if (submodules_.agc_manager) {
1981 capture_.recommended_input_volume =
1982 submodules_.agc_manager->recommended_analog_level();
1983 return;
1984 }
1985
1986 if (submodules_.gain_control) {
1987 capture_.recommended_input_volume =
1988 submodules_.gain_control->stream_analog_level();
1989 return;
1990 }
1991
1992 if (submodules_.gain_controller2 &&
1993 config_.gain_controller2.input_volume_controller.enabled) {
1994 capture_.recommended_input_volume =
1995 submodules_.gain_controller2->GetRecommendedInputVolume();
1996 return;
1997 }
1998
1999 capture_.recommended_input_volume = capture_.applied_input_volume;
2000 }
2001
CreateAndAttachAecDump(absl::string_view file_name,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)2002 bool AudioProcessingImpl::CreateAndAttachAecDump(absl::string_view file_name,
2003 int64_t max_log_size_bytes,
2004 rtc::TaskQueue* worker_queue) {
2005 std::unique_ptr<AecDump> aec_dump =
2006 AecDumpFactory::Create(file_name, max_log_size_bytes, worker_queue);
2007 if (!aec_dump) {
2008 return false;
2009 }
2010
2011 AttachAecDump(std::move(aec_dump));
2012 return true;
2013 }
2014
CreateAndAttachAecDump(FILE * handle,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)2015 bool AudioProcessingImpl::CreateAndAttachAecDump(FILE* handle,
2016 int64_t max_log_size_bytes,
2017 rtc::TaskQueue* worker_queue) {
2018 std::unique_ptr<AecDump> aec_dump =
2019 AecDumpFactory::Create(handle, max_log_size_bytes, worker_queue);
2020 if (!aec_dump) {
2021 return false;
2022 }
2023
2024 AttachAecDump(std::move(aec_dump));
2025 return true;
2026 }
2027
AttachAecDump(std::unique_ptr<AecDump> aec_dump)2028 void AudioProcessingImpl::AttachAecDump(std::unique_ptr<AecDump> aec_dump) {
2029 RTC_DCHECK(aec_dump);
2030 MutexLock lock_render(&mutex_render_);
2031 MutexLock lock_capture(&mutex_capture_);
2032
2033 // The previously attached AecDump will be destroyed with the
2034 // 'aec_dump' parameter, which is after locks are released.
2035 aec_dump_.swap(aec_dump);
2036 WriteAecDumpConfigMessage(true);
2037 aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
2038 }
2039
DetachAecDump()2040 void AudioProcessingImpl::DetachAecDump() {
2041 // The d-tor of a task-queue based AecDump blocks until all pending
2042 // tasks are done. This construction avoids blocking while holding
2043 // the render and capture locks.
2044 std::unique_ptr<AecDump> aec_dump = nullptr;
2045 {
2046 MutexLock lock_render(&mutex_render_);
2047 MutexLock lock_capture(&mutex_capture_);
2048 aec_dump = std::move(aec_dump_);
2049 }
2050 }
2051
GetConfig() const2052 AudioProcessing::Config AudioProcessingImpl::GetConfig() const {
2053 MutexLock lock_render(&mutex_render_);
2054 MutexLock lock_capture(&mutex_capture_);
2055 return config_;
2056 }
2057
UpdateActiveSubmoduleStates()2058 bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
2059 return submodule_states_.Update(
2060 config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
2061 !!submodules_.noise_suppressor, !!submodules_.gain_control,
2062 !!submodules_.gain_controller2, !!submodules_.voice_activity_detector,
2063 config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled,
2064 capture_nonlocked_.echo_controller_enabled,
2065 !!submodules_.transient_suppressor);
2066 }
2067
InitializeTransientSuppressor()2068 void AudioProcessingImpl::InitializeTransientSuppressor() {
2069 if (config_.transient_suppression.enabled &&
2070 !constants_.transient_suppressor_forced_off) {
2071 // Attempt to create a transient suppressor, if one is not already created.
2072 if (!submodules_.transient_suppressor) {
2073 submodules_.transient_suppressor = CreateTransientSuppressor(
2074 submodule_creation_overrides_, transient_suppressor_vad_mode_,
2075 proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
2076 num_proc_channels());
2077 if (!submodules_.transient_suppressor) {
2078 RTC_LOG(LS_WARNING)
2079 << "No transient suppressor created (probably disabled)";
2080 }
2081 } else {
2082 submodules_.transient_suppressor->Initialize(
2083 proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
2084 num_proc_channels());
2085 }
2086 } else {
2087 submodules_.transient_suppressor.reset();
2088 }
2089 }
2090
InitializeHighPassFilter(bool forced_reset)2091 void AudioProcessingImpl::InitializeHighPassFilter(bool forced_reset) {
2092 bool high_pass_filter_needed_by_aec =
2093 config_.echo_canceller.enabled &&
2094 config_.echo_canceller.enforce_high_pass_filtering &&
2095 !config_.echo_canceller.mobile_mode;
2096 if (submodule_states_.HighPassFilteringRequired() ||
2097 high_pass_filter_needed_by_aec) {
2098 bool use_full_band = config_.high_pass_filter.apply_in_full_band &&
2099 !constants_.enforce_split_band_hpf;
2100 int rate = use_full_band ? proc_fullband_sample_rate_hz()
2101 : proc_split_sample_rate_hz();
2102 size_t num_channels =
2103 use_full_band ? num_output_channels() : num_proc_channels();
2104
2105 if (!submodules_.high_pass_filter ||
2106 rate != submodules_.high_pass_filter->sample_rate_hz() ||
2107 forced_reset ||
2108 num_channels != submodules_.high_pass_filter->num_channels()) {
2109 submodules_.high_pass_filter.reset(
2110 new HighPassFilter(rate, num_channels));
2111 }
2112 } else {
2113 submodules_.high_pass_filter.reset();
2114 }
2115 }
2116
InitializeEchoController()2117 void AudioProcessingImpl::InitializeEchoController() {
2118 bool use_echo_controller =
2119 echo_control_factory_ ||
2120 (config_.echo_canceller.enabled && !config_.echo_canceller.mobile_mode);
2121
2122 if (use_echo_controller) {
2123 // Create and activate the echo controller.
2124 if (echo_control_factory_) {
2125 submodules_.echo_controller = echo_control_factory_->Create(
2126 proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels());
2127 RTC_DCHECK(submodules_.echo_controller);
2128 } else {
2129 EchoCanceller3Config config;
2130 absl::optional<EchoCanceller3Config> multichannel_config;
2131 if (use_setup_specific_default_aec3_config_) {
2132 multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig();
2133 }
2134 submodules_.echo_controller = std::make_unique<EchoCanceller3>(
2135 config, multichannel_config, proc_sample_rate_hz(),
2136 num_reverse_channels(), num_proc_channels());
2137 }
2138
2139 // Setup the storage for returning the linear AEC output.
2140 if (config_.echo_canceller.export_linear_aec_output) {
2141 constexpr int kLinearOutputRateHz = 16000;
2142 capture_.linear_aec_output = std::make_unique<AudioBuffer>(
2143 kLinearOutputRateHz, num_proc_channels(), kLinearOutputRateHz,
2144 num_proc_channels(), kLinearOutputRateHz, num_proc_channels());
2145 } else {
2146 capture_.linear_aec_output.reset();
2147 }
2148
2149 capture_nonlocked_.echo_controller_enabled = true;
2150
2151 submodules_.echo_control_mobile.reset();
2152 aecm_render_signal_queue_.reset();
2153 return;
2154 }
2155
2156 submodules_.echo_controller.reset();
2157 capture_nonlocked_.echo_controller_enabled = false;
2158 capture_.linear_aec_output.reset();
2159
2160 if (!config_.echo_canceller.enabled) {
2161 submodules_.echo_control_mobile.reset();
2162 aecm_render_signal_queue_.reset();
2163 return;
2164 }
2165
2166 if (config_.echo_canceller.mobile_mode) {
2167 // Create and activate AECM.
2168 size_t max_element_size =
2169 std::max(static_cast<size_t>(1),
2170 kMaxAllowedValuesOfSamplesPerBand *
2171 EchoControlMobileImpl::NumCancellersRequired(
2172 num_output_channels(), num_reverse_channels()));
2173
2174 std::vector<int16_t> template_queue_element(max_element_size);
2175
2176 aecm_render_signal_queue_.reset(
2177 new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
2178 kMaxNumFramesToBuffer, template_queue_element,
2179 RenderQueueItemVerifier<int16_t>(max_element_size)));
2180
2181 aecm_render_queue_buffer_.resize(max_element_size);
2182 aecm_capture_queue_buffer_.resize(max_element_size);
2183
2184 submodules_.echo_control_mobile.reset(new EchoControlMobileImpl());
2185
2186 submodules_.echo_control_mobile->Initialize(proc_split_sample_rate_hz(),
2187 num_reverse_channels(),
2188 num_output_channels());
2189 return;
2190 }
2191
2192 submodules_.echo_control_mobile.reset();
2193 aecm_render_signal_queue_.reset();
2194 }
2195
InitializeGainController1()2196 void AudioProcessingImpl::InitializeGainController1() {
2197 if (config_.gain_controller2.enabled &&
2198 config_.gain_controller2.input_volume_controller.enabled &&
2199 config_.gain_controller1.enabled &&
2200 (config_.gain_controller1.mode ==
2201 AudioProcessing::Config::GainController1::kAdaptiveAnalog ||
2202 config_.gain_controller1.analog_gain_controller.enabled)) {
2203 RTC_LOG(LS_ERROR) << "APM configuration not valid: "
2204 << "Multiple input volume controllers enabled.";
2205 }
2206
2207 if (!config_.gain_controller1.enabled) {
2208 submodules_.agc_manager.reset();
2209 submodules_.gain_control.reset();
2210 return;
2211 }
2212
2213 RTC_HISTOGRAM_BOOLEAN(
2214 "WebRTC.Audio.GainController.Analog.Enabled",
2215 config_.gain_controller1.analog_gain_controller.enabled);
2216
2217 if (!submodules_.gain_control) {
2218 submodules_.gain_control.reset(new GainControlImpl());
2219 }
2220
2221 submodules_.gain_control->Initialize(num_proc_channels(),
2222 proc_sample_rate_hz());
2223 if (!config_.gain_controller1.analog_gain_controller.enabled) {
2224 int error = submodules_.gain_control->set_mode(
2225 Agc1ConfigModeToInterfaceMode(config_.gain_controller1.mode));
2226 RTC_DCHECK_EQ(kNoError, error);
2227 error = submodules_.gain_control->set_target_level_dbfs(
2228 config_.gain_controller1.target_level_dbfs);
2229 RTC_DCHECK_EQ(kNoError, error);
2230 error = submodules_.gain_control->set_compression_gain_db(
2231 config_.gain_controller1.compression_gain_db);
2232 RTC_DCHECK_EQ(kNoError, error);
2233 error = submodules_.gain_control->enable_limiter(
2234 config_.gain_controller1.enable_limiter);
2235 RTC_DCHECK_EQ(kNoError, error);
2236 constexpr int kAnalogLevelMinimum = 0;
2237 constexpr int kAnalogLevelMaximum = 255;
2238 error = submodules_.gain_control->set_analog_level_limits(
2239 kAnalogLevelMinimum, kAnalogLevelMaximum);
2240 RTC_DCHECK_EQ(kNoError, error);
2241
2242 submodules_.agc_manager.reset();
2243 return;
2244 }
2245
2246 if (!submodules_.agc_manager.get() ||
2247 submodules_.agc_manager->num_channels() !=
2248 static_cast<int>(num_proc_channels())) {
2249 int stream_analog_level = -1;
2250 const bool re_creation = !!submodules_.agc_manager;
2251 if (re_creation) {
2252 stream_analog_level = submodules_.agc_manager->recommended_analog_level();
2253 }
2254 submodules_.agc_manager.reset(new AgcManagerDirect(
2255 num_proc_channels(), config_.gain_controller1.analog_gain_controller));
2256 if (re_creation) {
2257 submodules_.agc_manager->set_stream_analog_level(stream_analog_level);
2258 }
2259 }
2260 submodules_.agc_manager->Initialize();
2261 submodules_.agc_manager->SetupDigitalGainControl(*submodules_.gain_control);
2262 submodules_.agc_manager->HandleCaptureOutputUsedChange(
2263 capture_.capture_output_used);
2264 }
2265
InitializeGainController2(bool config_has_changed)2266 void AudioProcessingImpl::InitializeGainController2(bool config_has_changed) {
2267 if (!config_has_changed) {
2268 return;
2269 }
2270 if (!config_.gain_controller2.enabled) {
2271 submodules_.gain_controller2.reset();
2272 return;
2273 }
2274 if (!submodules_.gain_controller2 || config_has_changed) {
2275 const bool use_internal_vad =
2276 transient_suppressor_vad_mode_ != TransientSuppressor::VadMode::kRnnVad;
2277 submodules_.gain_controller2 = std::make_unique<GainController2>(
2278 config_.gain_controller2,
2279 input_volume_controller_config_override_.value_or(
2280 InputVolumeController::Config{}),
2281 proc_fullband_sample_rate_hz(), num_input_channels(), use_internal_vad);
2282 submodules_.gain_controller2->SetCaptureOutputUsed(
2283 capture_.capture_output_used);
2284 }
2285 }
2286
InitializeVoiceActivityDetector(bool config_has_changed)2287 void AudioProcessingImpl::InitializeVoiceActivityDetector(
2288 bool config_has_changed) {
2289 if (!config_has_changed) {
2290 return;
2291 }
2292 const bool use_vad =
2293 transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
2294 config_.gain_controller2.enabled &&
2295 config_.gain_controller2.adaptive_digital.enabled;
2296 if (!use_vad) {
2297 submodules_.voice_activity_detector.reset();
2298 return;
2299 }
2300 if (!submodules_.voice_activity_detector || config_has_changed) {
2301 RTC_DCHECK(!!submodules_.gain_controller2);
2302 // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
2303 submodules_.voice_activity_detector =
2304 std::make_unique<VoiceActivityDetectorWrapper>(
2305 config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
2306 submodules_.gain_controller2->GetCpuFeatures(),
2307 proc_fullband_sample_rate_hz());
2308 }
2309 }
2310
InitializeNoiseSuppressor()2311 void AudioProcessingImpl::InitializeNoiseSuppressor() {
2312 submodules_.noise_suppressor.reset();
2313
2314 if (config_.noise_suppression.enabled) {
2315 auto map_level =
2316 [](AudioProcessing::Config::NoiseSuppression::Level level) {
2317 using NoiseSuppresionConfig =
2318 AudioProcessing::Config::NoiseSuppression;
2319 switch (level) {
2320 case NoiseSuppresionConfig::kLow:
2321 return NsConfig::SuppressionLevel::k6dB;
2322 case NoiseSuppresionConfig::kModerate:
2323 return NsConfig::SuppressionLevel::k12dB;
2324 case NoiseSuppresionConfig::kHigh:
2325 return NsConfig::SuppressionLevel::k18dB;
2326 case NoiseSuppresionConfig::kVeryHigh:
2327 return NsConfig::SuppressionLevel::k21dB;
2328 }
2329 RTC_CHECK_NOTREACHED();
2330 };
2331
2332 NsConfig cfg;
2333 cfg.target_level = map_level(config_.noise_suppression.level);
2334 submodules_.noise_suppressor = std::make_unique<NoiseSuppressor>(
2335 cfg, proc_sample_rate_hz(), num_proc_channels());
2336 }
2337 }
2338
InitializeCaptureLevelsAdjuster()2339 void AudioProcessingImpl::InitializeCaptureLevelsAdjuster() {
2340 if (config_.pre_amplifier.enabled ||
2341 config_.capture_level_adjustment.enabled) {
2342 // Use both the pre-amplifier and the capture level adjustment gains as
2343 // pre-gains.
2344 float pre_gain = 1.f;
2345 if (config_.pre_amplifier.enabled) {
2346 pre_gain *= config_.pre_amplifier.fixed_gain_factor;
2347 }
2348 if (config_.capture_level_adjustment.enabled) {
2349 pre_gain *= config_.capture_level_adjustment.pre_gain_factor;
2350 }
2351
2352 submodules_.capture_levels_adjuster =
2353 std::make_unique<CaptureLevelsAdjuster>(
2354 config_.capture_level_adjustment.analog_mic_gain_emulation.enabled,
2355 config_.capture_level_adjustment.analog_mic_gain_emulation
2356 .initial_level,
2357 pre_gain, config_.capture_level_adjustment.post_gain_factor);
2358 } else {
2359 submodules_.capture_levels_adjuster.reset();
2360 }
2361 }
2362
InitializeResidualEchoDetector()2363 void AudioProcessingImpl::InitializeResidualEchoDetector() {
2364 if (submodules_.echo_detector) {
2365 submodules_.echo_detector->Initialize(
2366 proc_fullband_sample_rate_hz(), 1,
2367 formats_.render_processing_format.sample_rate_hz(), 1);
2368 }
2369 }
2370
InitializeAnalyzer()2371 void AudioProcessingImpl::InitializeAnalyzer() {
2372 if (submodules_.capture_analyzer) {
2373 submodules_.capture_analyzer->Initialize(proc_fullband_sample_rate_hz(),
2374 num_proc_channels());
2375 }
2376 }
2377
InitializePostProcessor()2378 void AudioProcessingImpl::InitializePostProcessor() {
2379 if (submodules_.capture_post_processor) {
2380 submodules_.capture_post_processor->Initialize(
2381 proc_fullband_sample_rate_hz(), num_proc_channels());
2382 }
2383 }
2384
InitializePreProcessor()2385 void AudioProcessingImpl::InitializePreProcessor() {
2386 if (submodules_.render_pre_processor) {
2387 submodules_.render_pre_processor->Initialize(
2388 formats_.render_processing_format.sample_rate_hz(),
2389 formats_.render_processing_format.num_channels());
2390 }
2391 }
2392
WriteAecDumpConfigMessage(bool forced)2393 void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) {
2394 if (!aec_dump_) {
2395 return;
2396 }
2397
2398 std::string experiments_description = "";
2399 // TODO(peah): Add semicolon-separated concatenations of experiment
2400 // descriptions for other submodules.
2401 if (!!submodules_.capture_post_processor) {
2402 experiments_description += "CapturePostProcessor;";
2403 }
2404 if (!!submodules_.render_pre_processor) {
2405 experiments_description += "RenderPreProcessor;";
2406 }
2407 if (capture_nonlocked_.echo_controller_enabled) {
2408 experiments_description += "EchoController;";
2409 }
2410 if (config_.gain_controller2.enabled) {
2411 experiments_description += "GainController2;";
2412 }
2413
2414 InternalAPMConfig apm_config;
2415
2416 apm_config.aec_enabled = config_.echo_canceller.enabled;
2417 apm_config.aec_delay_agnostic_enabled = false;
2418 apm_config.aec_extended_filter_enabled = false;
2419 apm_config.aec_suppression_level = 0;
2420
2421 apm_config.aecm_enabled = !!submodules_.echo_control_mobile;
2422 apm_config.aecm_comfort_noise_enabled =
2423 submodules_.echo_control_mobile &&
2424 submodules_.echo_control_mobile->is_comfort_noise_enabled();
2425 apm_config.aecm_routing_mode =
2426 submodules_.echo_control_mobile
2427 ? static_cast<int>(submodules_.echo_control_mobile->routing_mode())
2428 : 0;
2429
2430 apm_config.agc_enabled = !!submodules_.gain_control;
2431
2432 apm_config.agc_mode = submodules_.gain_control
2433 ? static_cast<int>(submodules_.gain_control->mode())
2434 : GainControl::kAdaptiveAnalog;
2435 apm_config.agc_limiter_enabled =
2436 submodules_.gain_control ? submodules_.gain_control->is_limiter_enabled()
2437 : false;
2438 apm_config.noise_robust_agc_enabled = !!submodules_.agc_manager;
2439
2440 apm_config.hpf_enabled = config_.high_pass_filter.enabled;
2441
2442 apm_config.ns_enabled = config_.noise_suppression.enabled;
2443 apm_config.ns_level = static_cast<int>(config_.noise_suppression.level);
2444
2445 apm_config.transient_suppression_enabled =
2446 config_.transient_suppression.enabled;
2447 apm_config.experiments_description = experiments_description;
2448 apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
2449 apm_config.pre_amplifier_fixed_gain_factor =
2450 config_.pre_amplifier.fixed_gain_factor;
2451
2452 if (!forced && apm_config == apm_config_for_aec_dump_) {
2453 return;
2454 }
2455 aec_dump_->WriteConfig(apm_config);
2456 apm_config_for_aec_dump_ = apm_config;
2457 }
2458
RecordUnprocessedCaptureStream(const float * const * src)2459 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
2460 const float* const* src) {
2461 RTC_DCHECK(aec_dump_);
2462 WriteAecDumpConfigMessage(false);
2463
2464 const size_t channel_size = formats_.api_format.input_stream().num_frames();
2465 const size_t num_channels = formats_.api_format.input_stream().num_channels();
2466 aec_dump_->AddCaptureStreamInput(
2467 AudioFrameView<const float>(src, num_channels, channel_size));
2468 RecordAudioProcessingState();
2469 }
2470
RecordUnprocessedCaptureStream(const int16_t * const data,const StreamConfig & config)2471 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
2472 const int16_t* const data,
2473 const StreamConfig& config) {
2474 RTC_DCHECK(aec_dump_);
2475 WriteAecDumpConfigMessage(false);
2476
2477 aec_dump_->AddCaptureStreamInput(data, config.num_channels(),
2478 config.num_frames());
2479 RecordAudioProcessingState();
2480 }
2481
RecordProcessedCaptureStream(const float * const * processed_capture_stream)2482 void AudioProcessingImpl::RecordProcessedCaptureStream(
2483 const float* const* processed_capture_stream) {
2484 RTC_DCHECK(aec_dump_);
2485
2486 const size_t channel_size = formats_.api_format.output_stream().num_frames();
2487 const size_t num_channels =
2488 formats_.api_format.output_stream().num_channels();
2489 aec_dump_->AddCaptureStreamOutput(AudioFrameView<const float>(
2490 processed_capture_stream, num_channels, channel_size));
2491 aec_dump_->WriteCaptureStreamMessage();
2492 }
2493
RecordProcessedCaptureStream(const int16_t * const data,const StreamConfig & config)2494 void AudioProcessingImpl::RecordProcessedCaptureStream(
2495 const int16_t* const data,
2496 const StreamConfig& config) {
2497 RTC_DCHECK(aec_dump_);
2498
2499 aec_dump_->AddCaptureStreamOutput(data, config.num_channels(),
2500 config.num_frames());
2501 aec_dump_->WriteCaptureStreamMessage();
2502 }
2503
RecordAudioProcessingState()2504 void AudioProcessingImpl::RecordAudioProcessingState() {
2505 RTC_DCHECK(aec_dump_);
2506 AecDump::AudioProcessingState audio_proc_state;
2507 audio_proc_state.delay = capture_nonlocked_.stream_delay_ms;
2508 audio_proc_state.drift = 0;
2509 audio_proc_state.applied_input_volume = capture_.applied_input_volume;
2510 audio_proc_state.keypress = capture_.key_pressed;
2511 aec_dump_->AddAudioProcessingState(audio_proc_state);
2512 }
2513
ApmCaptureState()2514 AudioProcessingImpl::ApmCaptureState::ApmCaptureState()
2515 : was_stream_delay_set(false),
2516 capture_output_used(true),
2517 capture_output_used_last_frame(true),
2518 key_pressed(false),
2519 capture_processing_format(kSampleRate16kHz),
2520 split_rate(kSampleRate16kHz),
2521 echo_path_gain_change(false),
2522 prev_pre_adjustment_gain(-1.0f),
2523 playout_volume(-1),
2524 prev_playout_volume(-1),
2525 applied_input_volume_changed(false) {}
2526
2527 AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default;
2528
2529 AudioProcessingImpl::ApmRenderState::ApmRenderState() = default;
2530
2531 AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default;
2532
ApmStatsReporter()2533 AudioProcessingImpl::ApmStatsReporter::ApmStatsReporter()
2534 : stats_message_queue_(1) {}
2535
2536 AudioProcessingImpl::ApmStatsReporter::~ApmStatsReporter() = default;
2537
GetStatistics()2538 AudioProcessingStats AudioProcessingImpl::ApmStatsReporter::GetStatistics() {
2539 MutexLock lock_stats(&mutex_stats_);
2540 bool new_stats_available = stats_message_queue_.Remove(&cached_stats_);
2541 // If the message queue is full, return the cached stats.
2542 static_cast<void>(new_stats_available);
2543
2544 return cached_stats_;
2545 }
2546
UpdateStatistics(const AudioProcessingStats & new_stats)2547 void AudioProcessingImpl::ApmStatsReporter::UpdateStatistics(
2548 const AudioProcessingStats& new_stats) {
2549 AudioProcessingStats stats_to_queue = new_stats;
2550 bool stats_message_passed = stats_message_queue_.Insert(&stats_to_queue);
2551 // If the message queue is full, discard the new stats.
2552 static_cast<void>(stats_message_passed);
2553 }
2554
2555 } // namespace webrtc
2556