• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/audio_processing_impl.h"
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <cstring>
16 #include <memory>
17 #include <string>
18 #include <type_traits>
19 #include <utility>
20 
21 #include "absl/strings/match.h"
22 #include "absl/strings/string_view.h"
23 #include "absl/types/optional.h"
24 #include "api/array_view.h"
25 #include "api/audio/audio_frame.h"
26 #include "common_audio/audio_converter.h"
27 #include "common_audio/include/audio_util.h"
28 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
29 #include "modules/audio_processing/audio_buffer.h"
30 #include "modules/audio_processing/include/audio_frame_view.h"
31 #include "modules/audio_processing/logging/apm_data_dumper.h"
32 #include "modules/audio_processing/optionally_built_submodule_creators.h"
33 #include "rtc_base/checks.h"
34 #include "rtc_base/experiments/field_trial_parser.h"
35 #include "rtc_base/logging.h"
36 #include "rtc_base/time_utils.h"
37 #include "rtc_base/trace_event.h"
38 #include "system_wrappers/include/denormal_disabler.h"
39 #include "system_wrappers/include/field_trial.h"
40 #include "system_wrappers/include/metrics.h"
41 
42 #define RETURN_ON_ERR(expr) \
43   do {                      \
44     int err = (expr);       \
45     if (err != kNoError) {  \
46       return err;           \
47     }                       \
48   } while (0)
49 
50 namespace webrtc {
51 
52 namespace {
53 
SampleRateSupportsMultiBand(int sample_rate_hz)54 bool SampleRateSupportsMultiBand(int sample_rate_hz) {
55   return sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
56          sample_rate_hz == AudioProcessing::kSampleRate48kHz;
57 }
58 
59 // Checks whether the high-pass filter should be done in the full-band.
EnforceSplitBandHpf()60 bool EnforceSplitBandHpf() {
61   return field_trial::IsEnabled("WebRTC-FullBandHpfKillSwitch");
62 }
63 
64 // Checks whether AEC3 should be allowed to decide what the default
65 // configuration should be based on the render and capture channel configuration
66 // at hand.
UseSetupSpecificDefaultAec3Congfig()67 bool UseSetupSpecificDefaultAec3Congfig() {
68   return !field_trial::IsEnabled(
69       "WebRTC-Aec3SetupSpecificDefaultConfigDefaultsKillSwitch");
70 }
71 
72 // If the "WebRTC-Audio-TransientSuppressorVadMode" field trial is unspecified,
73 // returns `TransientSuppressor::VadMode::kDefault`, otherwise parses the field
74 // trial and returns the specified mode:
75 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-Default returns `kDefault`;
76 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad returns `kRnnVad`;
77 // - WebRTC-Audio-TransientSuppressorVadMode/Enabled-NoVad returns `kNoVad`.
GetTransientSuppressorVadMode()78 TransientSuppressor::VadMode GetTransientSuppressorVadMode() {
79   constexpr char kFieldTrial[] = "WebRTC-Audio-TransientSuppressorVadMode";
80   std::string full_name = webrtc::field_trial::FindFullName(kFieldTrial);
81   if (full_name.empty() || absl::EndsWith(full_name, "-Default")) {
82     return TransientSuppressor::VadMode::kDefault;
83   }
84   if (absl::EndsWith(full_name, "-RnnVad")) {
85     return TransientSuppressor::VadMode::kRnnVad;
86   }
87   if (absl::EndsWith(full_name, "-NoVad")) {
88     return TransientSuppressor::VadMode::kNoVad;
89   }
90   // Fallback to default.
91   RTC_LOG(LS_WARNING) << "Invalid parameter for " << kFieldTrial;
92   return TransientSuppressor::VadMode::kDefault;
93 }
94 
95 // Identify the native processing rate that best handles a sample rate.
SuitableProcessRate(int minimum_rate,int max_splitting_rate,bool band_splitting_required)96 int SuitableProcessRate(int minimum_rate,
97                         int max_splitting_rate,
98                         bool band_splitting_required) {
99   const int uppermost_native_rate =
100       band_splitting_required ? max_splitting_rate : 48000;
101   for (auto rate : {16000, 32000, 48000}) {
102     if (rate >= uppermost_native_rate) {
103       return uppermost_native_rate;
104     }
105     if (rate >= minimum_rate) {
106       return rate;
107     }
108   }
109   RTC_DCHECK_NOTREACHED();
110   return uppermost_native_rate;
111 }
112 
Agc1ConfigModeToInterfaceMode(AudioProcessing::Config::GainController1::Mode mode)113 GainControl::Mode Agc1ConfigModeToInterfaceMode(
114     AudioProcessing::Config::GainController1::Mode mode) {
115   using Agc1Config = AudioProcessing::Config::GainController1;
116   switch (mode) {
117     case Agc1Config::kAdaptiveAnalog:
118       return GainControl::kAdaptiveAnalog;
119     case Agc1Config::kAdaptiveDigital:
120       return GainControl::kAdaptiveDigital;
121     case Agc1Config::kFixedDigital:
122       return GainControl::kFixedDigital;
123   }
124   RTC_CHECK_NOTREACHED();
125 }
126 
MinimizeProcessingForUnusedOutput()127 bool MinimizeProcessingForUnusedOutput() {
128   return !field_trial::IsEnabled("WebRTC-MutedStateKillSwitch");
129 }
130 
131 // Maximum lengths that frame of samples being passed from the render side to
132 // the capture side can have (does not apply to AEC3).
133 static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
134 static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
135 
136 // Maximum number of frames to buffer in the render queue.
137 // TODO(peah): Decrease this once we properly handle hugely unbalanced
138 // reverse and forward call numbers.
139 static const size_t kMaxNumFramesToBuffer = 100;
140 
PackRenderAudioBufferForEchoDetector(const AudioBuffer & audio,std::vector<float> & packed_buffer)141 void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio,
142                                           std::vector<float>& packed_buffer) {
143   packed_buffer.clear();
144   packed_buffer.insert(packed_buffer.end(), audio.channels_const()[0],
145                        audio.channels_const()[0] + audio.num_frames());
146 }
147 
148 // Options for gracefully handling processing errors.
149 enum class FormatErrorOutputOption {
150   kOutputExactCopyOfInput,
151   kOutputBroadcastCopyOfFirstInputChannel,
152   kOutputSilence,
153   kDoNothing
154 };
155 
156 enum class AudioFormatValidity {
157   // Format is supported by APM.
158   kValidAndSupported,
159   // Format has a reasonable interpretation but is not supported.
160   kValidButUnsupportedSampleRate,
161   // The remaining enums values signal that the audio does not have a reasonable
162   // interpretation and cannot be used.
163   kInvalidSampleRate,
164   kInvalidChannelCount
165 };
166 
ValidateAudioFormat(const StreamConfig & config)167 AudioFormatValidity ValidateAudioFormat(const StreamConfig& config) {
168   if (config.sample_rate_hz() < 0)
169     return AudioFormatValidity::kInvalidSampleRate;
170   if (config.num_channels() == 0)
171     return AudioFormatValidity::kInvalidChannelCount;
172 
173   // Format has a reasonable interpretation, but may still be unsupported.
174   if (config.sample_rate_hz() < 8000 ||
175       config.sample_rate_hz() > AudioBuffer::kMaxSampleRate)
176     return AudioFormatValidity::kValidButUnsupportedSampleRate;
177 
178   // Format is fully supported.
179   return AudioFormatValidity::kValidAndSupported;
180 }
181 
AudioFormatValidityToErrorCode(AudioFormatValidity validity)182 int AudioFormatValidityToErrorCode(AudioFormatValidity validity) {
183   switch (validity) {
184     case AudioFormatValidity::kValidAndSupported:
185       return AudioProcessing::kNoError;
186     case AudioFormatValidity::kValidButUnsupportedSampleRate:  // fall-through
187     case AudioFormatValidity::kInvalidSampleRate:
188       return AudioProcessing::kBadSampleRateError;
189     case AudioFormatValidity::kInvalidChannelCount:
190       return AudioProcessing::kBadNumberChannelsError;
191   }
192   RTC_DCHECK(false);
193 }
194 
195 // Returns an AudioProcessing::Error together with the best possible option for
196 // output audio content.
ChooseErrorOutputOption(const StreamConfig & input_config,const StreamConfig & output_config)197 std::pair<int, FormatErrorOutputOption> ChooseErrorOutputOption(
198     const StreamConfig& input_config,
199     const StreamConfig& output_config) {
200   AudioFormatValidity input_validity = ValidateAudioFormat(input_config);
201   AudioFormatValidity output_validity = ValidateAudioFormat(output_config);
202 
203   if (input_validity == AudioFormatValidity::kValidAndSupported &&
204       output_validity == AudioFormatValidity::kValidAndSupported &&
205       (output_config.num_channels() == 1 ||
206        output_config.num_channels() == input_config.num_channels())) {
207     return {AudioProcessing::kNoError, FormatErrorOutputOption::kDoNothing};
208   }
209 
210   int error_code = AudioFormatValidityToErrorCode(input_validity);
211   if (error_code == AudioProcessing::kNoError) {
212     error_code = AudioFormatValidityToErrorCode(output_validity);
213   }
214   if (error_code == AudioProcessing::kNoError) {
215     // The individual formats are valid but there is some error - must be
216     // channel mismatch.
217     error_code = AudioProcessing::kBadNumberChannelsError;
218   }
219 
220   FormatErrorOutputOption output_option;
221   if (output_validity != AudioFormatValidity::kValidAndSupported &&
222       output_validity != AudioFormatValidity::kValidButUnsupportedSampleRate) {
223     // The output format is uninterpretable: cannot do anything.
224     output_option = FormatErrorOutputOption::kDoNothing;
225   } else if (input_validity != AudioFormatValidity::kValidAndSupported &&
226              input_validity !=
227                  AudioFormatValidity::kValidButUnsupportedSampleRate) {
228     // The input format is uninterpretable: cannot use it, must output silence.
229     output_option = FormatErrorOutputOption::kOutputSilence;
230   } else if (input_config.sample_rate_hz() != output_config.sample_rate_hz()) {
231     // Sample rates do not match: Cannot copy input into output, output silence.
232     // Note: If the sample rates are in a supported range, we could resample.
233     // However, that would significantly increase complexity of this error
234     // handling code.
235     output_option = FormatErrorOutputOption::kOutputSilence;
236   } else if (input_config.num_channels() != output_config.num_channels()) {
237     // Channel counts do not match: We cannot easily map input channels to
238     // output channels.
239     output_option =
240         FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel;
241   } else {
242     // The formats match exactly.
243     RTC_DCHECK(input_config == output_config);
244     output_option = FormatErrorOutputOption::kOutputExactCopyOfInput;
245   }
246   return std::make_pair(error_code, output_option);
247 }
248 
249 // Checks if the audio format is supported. If not, the output is populated in a
250 // best-effort manner and an APM error code is returned.
HandleUnsupportedAudioFormats(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)251 int HandleUnsupportedAudioFormats(const int16_t* const src,
252                                   const StreamConfig& input_config,
253                                   const StreamConfig& output_config,
254                                   int16_t* const dest) {
255   RTC_DCHECK(src);
256   RTC_DCHECK(dest);
257 
258   auto [error_code, output_option] =
259       ChooseErrorOutputOption(input_config, output_config);
260   if (error_code == AudioProcessing::kNoError)
261     return AudioProcessing::kNoError;
262 
263   const size_t num_output_channels = output_config.num_channels();
264   switch (output_option) {
265     case FormatErrorOutputOption::kOutputSilence:
266       memset(dest, 0, output_config.num_samples() * sizeof(int16_t));
267       break;
268     case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
269       for (size_t i = 0; i < output_config.num_frames(); ++i) {
270         int16_t sample = src[input_config.num_channels() * i];
271         for (size_t ch = 0; ch < num_output_channels; ++ch) {
272           dest[ch + num_output_channels * i] = sample;
273         }
274       }
275       break;
276     case FormatErrorOutputOption::kOutputExactCopyOfInput:
277       memcpy(dest, src, output_config.num_samples() * sizeof(int16_t));
278       break;
279     case FormatErrorOutputOption::kDoNothing:
280       break;
281   }
282   return error_code;
283 }
284 
285 // Checks if the audio format is supported. If not, the output is populated in a
286 // best-effort manner and an APM error code is returned.
HandleUnsupportedAudioFormats(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)287 int HandleUnsupportedAudioFormats(const float* const* src,
288                                   const StreamConfig& input_config,
289                                   const StreamConfig& output_config,
290                                   float* const* dest) {
291   RTC_DCHECK(src);
292   RTC_DCHECK(dest);
293   for (size_t i = 0; i < input_config.num_channels(); ++i) {
294     RTC_DCHECK(src[i]);
295   }
296   for (size_t i = 0; i < output_config.num_channels(); ++i) {
297     RTC_DCHECK(dest[i]);
298   }
299 
300   auto [error_code, output_option] =
301       ChooseErrorOutputOption(input_config, output_config);
302   if (error_code == AudioProcessing::kNoError)
303     return AudioProcessing::kNoError;
304 
305   const size_t num_output_channels = output_config.num_channels();
306   switch (output_option) {
307     case FormatErrorOutputOption::kOutputSilence:
308       for (size_t ch = 0; ch < num_output_channels; ++ch) {
309         memset(dest[ch], 0, output_config.num_frames() * sizeof(float));
310       }
311       break;
312     case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
313       for (size_t ch = 0; ch < num_output_channels; ++ch) {
314         memcpy(dest[ch], src[0], output_config.num_frames() * sizeof(float));
315       }
316       break;
317     case FormatErrorOutputOption::kOutputExactCopyOfInput:
318       for (size_t ch = 0; ch < num_output_channels; ++ch) {
319         memcpy(dest[ch], src[ch], output_config.num_frames() * sizeof(float));
320       }
321       break;
322     case FormatErrorOutputOption::kDoNothing:
323       break;
324   }
325   return error_code;
326 }
327 
328 const absl::optional<InputVolumeController::Config>
GetInputVolumeControllerConfigOverride()329 GetInputVolumeControllerConfigOverride() {
330   constexpr char kInputVolumeControllerFieldTrial[] =
331       "WebRTC-Audio-InputVolumeControllerExperiment";
332 
333   if (!field_trial::IsEnabled(kInputVolumeControllerFieldTrial)) {
334     return absl::nullopt;
335   }
336 
337   constexpr InputVolumeController::Config kDefaultConfig;
338 
339   FieldTrialFlag enabled("Enabled", false);
340   FieldTrialConstrained<int> clipped_level_min(
341       "clipped_level_min", kDefaultConfig.clipped_level_min, 0, 255);
342   FieldTrialConstrained<int> clipped_level_step(
343       "clipped_level_step", kDefaultConfig.clipped_level_step, 0, 255);
344   FieldTrialConstrained<double> clipped_ratio_threshold(
345       "clipped_ratio_threshold", kDefaultConfig.clipped_ratio_threshold, 0, 1);
346   FieldTrialConstrained<int> clipped_wait_frames(
347       "clipped_wait_frames", kDefaultConfig.clipped_wait_frames, 0,
348       absl::nullopt);
349   FieldTrialParameter<bool> enable_clipping_predictor(
350       "enable_clipping_predictor", kDefaultConfig.enable_clipping_predictor);
351   FieldTrialConstrained<int> target_range_max_dbfs(
352       "target_range_max_dbfs", kDefaultConfig.target_range_max_dbfs, -90, 30);
353   FieldTrialConstrained<int> target_range_min_dbfs(
354       "target_range_min_dbfs", kDefaultConfig.target_range_min_dbfs, -90, 30);
355   FieldTrialConstrained<int> update_input_volume_wait_frames(
356       "update_input_volume_wait_frames",
357       kDefaultConfig.update_input_volume_wait_frames, 0, absl::nullopt);
358   FieldTrialConstrained<double> speech_probability_threshold(
359       "speech_probability_threshold",
360       kDefaultConfig.speech_probability_threshold, 0, 1);
361   FieldTrialConstrained<double> speech_ratio_threshold(
362       "speech_ratio_threshold", kDefaultConfig.speech_ratio_threshold, 0, 1);
363 
364   // Field-trial based override for the input volume controller config.
365   const std::string field_trial_name =
366       field_trial::FindFullName(kInputVolumeControllerFieldTrial);
367 
368   ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step,
369                    &clipped_ratio_threshold, &clipped_wait_frames,
370                    &enable_clipping_predictor, &target_range_max_dbfs,
371                    &target_range_min_dbfs, &update_input_volume_wait_frames,
372                    &speech_probability_threshold, &speech_ratio_threshold},
373                   field_trial_name);
374 
375   // Checked already by `IsEnabled()` before parsing, therefore always true.
376   RTC_DCHECK(enabled);
377 
378   return InputVolumeController::Config{
379       .clipped_level_min = static_cast<int>(clipped_level_min.Get()),
380       .clipped_level_step = static_cast<int>(clipped_level_step.Get()),
381       .clipped_ratio_threshold =
382           static_cast<float>(clipped_ratio_threshold.Get()),
383       .clipped_wait_frames = static_cast<int>(clipped_wait_frames.Get()),
384       .enable_clipping_predictor =
385           static_cast<bool>(enable_clipping_predictor.Get()),
386       .target_range_max_dbfs = static_cast<int>(target_range_max_dbfs.Get()),
387       .target_range_min_dbfs = static_cast<int>(target_range_min_dbfs.Get()),
388       .update_input_volume_wait_frames =
389           static_cast<int>(update_input_volume_wait_frames.Get()),
390       .speech_probability_threshold =
391           static_cast<float>(speech_probability_threshold.Get()),
392       .speech_ratio_threshold =
393           static_cast<float>(speech_ratio_threshold.Get()),
394   };
395 }
396 
397 // Switches all gain control to AGC2 if experimenting with input volume
398 // controller.
AdjustConfig(const AudioProcessing::Config & config,const absl::optional<InputVolumeController::Config> & input_volume_controller_config_override)399 const AudioProcessing::Config AdjustConfig(
400     const AudioProcessing::Config& config,
401     const absl::optional<InputVolumeController::Config>&
402         input_volume_controller_config_override) {
403   const bool analog_agc_enabled =
404       config.gain_controller1.enabled &&
405       (config.gain_controller1.mode ==
406            AudioProcessing::Config::GainController1::kAdaptiveAnalog ||
407        config.gain_controller1.analog_gain_controller.enabled);
408 
409   // Do not update the config if none of the analog AGCs is active
410   // regardless of the input volume controller override.
411   if (!analog_agc_enabled ||
412       !input_volume_controller_config_override.has_value()) {
413     return config;
414   }
415 
416   const bool hybrid_agc_config_detected =
417       config.gain_controller1.enabled &&
418       config.gain_controller1.analog_gain_controller.enabled &&
419       !config.gain_controller1.analog_gain_controller.enable_digital_adaptive &&
420       config.gain_controller2.enabled &&
421       config.gain_controller2.adaptive_digital.enabled;
422 
423   const bool full_agc1_config_detected =
424       config.gain_controller1.enabled &&
425       config.gain_controller1.analog_gain_controller.enabled &&
426       config.gain_controller1.analog_gain_controller.enable_digital_adaptive &&
427       !config.gain_controller2.enabled;
428 
429   if (hybrid_agc_config_detected == full_agc1_config_detected ||
430       config.gain_controller2.input_volume_controller.enabled) {
431     RTC_LOG(LS_ERROR) << "Unexpected AGC config: Config not adjusted.";
432     return config;
433   }
434 
435   AudioProcessing::Config adjusted_config = config;
436   adjusted_config.gain_controller1.enabled = false;
437   adjusted_config.gain_controller1.analog_gain_controller.enabled = false;
438   adjusted_config.gain_controller2.enabled = true;
439   adjusted_config.gain_controller2.adaptive_digital.enabled = true;
440   adjusted_config.gain_controller2.input_volume_controller.enabled = true;
441 
442   return adjusted_config;
443 }
444 
445 using DownmixMethod = AudioProcessing::Config::Pipeline::DownmixMethod;
446 
SetDownmixMethod(AudioBuffer & buffer,DownmixMethod method)447 void SetDownmixMethod(AudioBuffer& buffer, DownmixMethod method) {
448   switch (method) {
449     case DownmixMethod::kAverageChannels:
450       buffer.set_downmixing_by_averaging();
451       break;
452     case DownmixMethod::kUseFirstChannel:
453       buffer.set_downmixing_to_specific_channel(/*channel=*/0);
454       break;
455   }
456 }
457 
458 constexpr int kUnspecifiedDataDumpInputVolume = -100;
459 
460 }  // namespace
461 
462 // Throughout webrtc, it's assumed that success is represented by zero.
463 static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero");
464 
SubmoduleStates(bool capture_post_processor_enabled,bool render_pre_processor_enabled,bool capture_analyzer_enabled)465 AudioProcessingImpl::SubmoduleStates::SubmoduleStates(
466     bool capture_post_processor_enabled,
467     bool render_pre_processor_enabled,
468     bool capture_analyzer_enabled)
469     : capture_post_processor_enabled_(capture_post_processor_enabled),
470       render_pre_processor_enabled_(render_pre_processor_enabled),
471       capture_analyzer_enabled_(capture_analyzer_enabled) {}
472 
Update(bool high_pass_filter_enabled,bool mobile_echo_controller_enabled,bool noise_suppressor_enabled,bool adaptive_gain_controller_enabled,bool gain_controller2_enabled,bool voice_activity_detector_enabled,bool gain_adjustment_enabled,bool echo_controller_enabled,bool transient_suppressor_enabled)473 bool AudioProcessingImpl::SubmoduleStates::Update(
474     bool high_pass_filter_enabled,
475     bool mobile_echo_controller_enabled,
476     bool noise_suppressor_enabled,
477     bool adaptive_gain_controller_enabled,
478     bool gain_controller2_enabled,
479     bool voice_activity_detector_enabled,
480     bool gain_adjustment_enabled,
481     bool echo_controller_enabled,
482     bool transient_suppressor_enabled) {
483   bool changed = false;
484   changed |= (high_pass_filter_enabled != high_pass_filter_enabled_);
485   changed |=
486       (mobile_echo_controller_enabled != mobile_echo_controller_enabled_);
487   changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
488   changed |=
489       (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
490   changed |= (gain_controller2_enabled != gain_controller2_enabled_);
491   changed |=
492       (voice_activity_detector_enabled != voice_activity_detector_enabled_);
493   changed |= (gain_adjustment_enabled != gain_adjustment_enabled_);
494   changed |= (echo_controller_enabled != echo_controller_enabled_);
495   changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
496   if (changed) {
497     high_pass_filter_enabled_ = high_pass_filter_enabled;
498     mobile_echo_controller_enabled_ = mobile_echo_controller_enabled;
499     noise_suppressor_enabled_ = noise_suppressor_enabled;
500     adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
501     gain_controller2_enabled_ = gain_controller2_enabled;
502     voice_activity_detector_enabled_ = voice_activity_detector_enabled;
503     gain_adjustment_enabled_ = gain_adjustment_enabled;
504     echo_controller_enabled_ = echo_controller_enabled;
505     transient_suppressor_enabled_ = transient_suppressor_enabled;
506   }
507 
508   changed |= first_update_;
509   first_update_ = false;
510   return changed;
511 }
512 
CaptureMultiBandSubModulesActive() const513 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandSubModulesActive()
514     const {
515   return CaptureMultiBandProcessingPresent();
516 }
517 
CaptureMultiBandProcessingPresent() const518 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingPresent()
519     const {
520   // If echo controller is present, assume it performs active processing.
521   return CaptureMultiBandProcessingActive(/*ec_processing_active=*/true);
522 }
523 
CaptureMultiBandProcessingActive(bool ec_processing_active) const524 bool AudioProcessingImpl::SubmoduleStates::CaptureMultiBandProcessingActive(
525     bool ec_processing_active) const {
526   return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
527          noise_suppressor_enabled_ || adaptive_gain_controller_enabled_ ||
528          (echo_controller_enabled_ && ec_processing_active);
529 }
530 
CaptureFullBandProcessingActive() const531 bool AudioProcessingImpl::SubmoduleStates::CaptureFullBandProcessingActive()
532     const {
533   return gain_controller2_enabled_ || capture_post_processor_enabled_ ||
534          gain_adjustment_enabled_;
535 }
536 
CaptureAnalyzerActive() const537 bool AudioProcessingImpl::SubmoduleStates::CaptureAnalyzerActive() const {
538   return capture_analyzer_enabled_;
539 }
540 
RenderMultiBandSubModulesActive() const541 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandSubModulesActive()
542     const {
543   return RenderMultiBandProcessingActive() || mobile_echo_controller_enabled_ ||
544          adaptive_gain_controller_enabled_ || echo_controller_enabled_;
545 }
546 
RenderFullBandProcessingActive() const547 bool AudioProcessingImpl::SubmoduleStates::RenderFullBandProcessingActive()
548     const {
549   return render_pre_processor_enabled_;
550 }
551 
RenderMultiBandProcessingActive() const552 bool AudioProcessingImpl::SubmoduleStates::RenderMultiBandProcessingActive()
553     const {
554   return false;
555 }
556 
HighPassFilteringRequired() const557 bool AudioProcessingImpl::SubmoduleStates::HighPassFilteringRequired() const {
558   return high_pass_filter_enabled_ || mobile_echo_controller_enabled_ ||
559          noise_suppressor_enabled_;
560 }
561 
AudioProcessingImpl()562 AudioProcessingImpl::AudioProcessingImpl()
563     : AudioProcessingImpl(/*config=*/{},
564                           /*capture_post_processor=*/nullptr,
565                           /*render_pre_processor=*/nullptr,
566                           /*echo_control_factory=*/nullptr,
567                           /*echo_detector=*/nullptr,
568                           /*capture_analyzer=*/nullptr) {}
569 
570 std::atomic<int> AudioProcessingImpl::instance_count_(0);
571 
AudioProcessingImpl(const AudioProcessing::Config & config,std::unique_ptr<CustomProcessing> capture_post_processor,std::unique_ptr<CustomProcessing> render_pre_processor,std::unique_ptr<EchoControlFactory> echo_control_factory,rtc::scoped_refptr<EchoDetector> echo_detector,std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)572 AudioProcessingImpl::AudioProcessingImpl(
573     const AudioProcessing::Config& config,
574     std::unique_ptr<CustomProcessing> capture_post_processor,
575     std::unique_ptr<CustomProcessing> render_pre_processor,
576     std::unique_ptr<EchoControlFactory> echo_control_factory,
577     rtc::scoped_refptr<EchoDetector> echo_detector,
578     std::unique_ptr<CustomAudioAnalyzer> capture_analyzer)
579     : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
580       use_setup_specific_default_aec3_config_(
581           UseSetupSpecificDefaultAec3Congfig()),
582       input_volume_controller_config_override_(
583           GetInputVolumeControllerConfigOverride()),
584       use_denormal_disabler_(
585           !field_trial::IsEnabled("WebRTC-ApmDenormalDisablerKillSwitch")),
586       transient_suppressor_vad_mode_(GetTransientSuppressorVadMode()),
587       capture_runtime_settings_(RuntimeSettingQueueSize()),
588       render_runtime_settings_(RuntimeSettingQueueSize()),
589       capture_runtime_settings_enqueuer_(&capture_runtime_settings_),
590       render_runtime_settings_enqueuer_(&render_runtime_settings_),
591       echo_control_factory_(std::move(echo_control_factory)),
592       config_(AdjustConfig(config, input_volume_controller_config_override_)),
593       submodule_states_(!!capture_post_processor,
594                         !!render_pre_processor,
595                         !!capture_analyzer),
596       submodules_(std::move(capture_post_processor),
597                   std::move(render_pre_processor),
598                   std::move(echo_detector),
599                   std::move(capture_analyzer)),
600       constants_(!field_trial::IsEnabled(
601                      "WebRTC-ApmExperimentalMultiChannelRenderKillSwitch"),
602                  !field_trial::IsEnabled(
603                      "WebRTC-ApmExperimentalMultiChannelCaptureKillSwitch"),
604                  EnforceSplitBandHpf(),
605                  MinimizeProcessingForUnusedOutput(),
606                  field_trial::IsEnabled("WebRTC-TransientSuppressorForcedOff")),
607       capture_(),
608       capture_nonlocked_(),
609       applied_input_volume_stats_reporter_(
610           InputVolumeStatsReporter::InputVolumeType::kApplied),
611       recommended_input_volume_stats_reporter_(
612           InputVolumeStatsReporter::InputVolumeType::kRecommended) {
613   RTC_LOG(LS_INFO) << "Injected APM submodules:"
614                       "\nEcho control factory: "
615                    << !!echo_control_factory_
616                    << "\nEcho detector: " << !!submodules_.echo_detector
617                    << "\nCapture analyzer: " << !!submodules_.capture_analyzer
618                    << "\nCapture post processor: "
619                    << !!submodules_.capture_post_processor
620                    << "\nRender pre processor: "
621                    << !!submodules_.render_pre_processor;
622   if (!DenormalDisabler::IsSupported()) {
623     RTC_LOG(LS_INFO) << "Denormal disabler unsupported";
624   }
625 
626   RTC_LOG(LS_INFO) << "AudioProcessing: " << config_.ToString();
627 
628   // Mark Echo Controller enabled if a factory is injected.
629   capture_nonlocked_.echo_controller_enabled =
630       static_cast<bool>(echo_control_factory_);
631 
632   Initialize();
633 }
634 
635 AudioProcessingImpl::~AudioProcessingImpl() = default;
636 
Initialize()637 int AudioProcessingImpl::Initialize() {
638   // Run in a single-threaded manner during initialization.
639   MutexLock lock_render(&mutex_render_);
640   MutexLock lock_capture(&mutex_capture_);
641   InitializeLocked();
642   return kNoError;
643 }
644 
Initialize(const ProcessingConfig & processing_config)645 int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
646   // Run in a single-threaded manner during initialization.
647   MutexLock lock_render(&mutex_render_);
648   MutexLock lock_capture(&mutex_capture_);
649   InitializeLocked(processing_config);
650   return kNoError;
651 }
652 
MaybeInitializeRender(const StreamConfig & input_config,const StreamConfig & output_config)653 void AudioProcessingImpl::MaybeInitializeRender(
654     const StreamConfig& input_config,
655     const StreamConfig& output_config) {
656   ProcessingConfig processing_config = formats_.api_format;
657   processing_config.reverse_input_stream() = input_config;
658   processing_config.reverse_output_stream() = output_config;
659 
660   if (processing_config == formats_.api_format) {
661     return;
662   }
663 
664   MutexLock lock_capture(&mutex_capture_);
665   InitializeLocked(processing_config);
666 }
667 
InitializeLocked()668 void AudioProcessingImpl::InitializeLocked() {
669   UpdateActiveSubmoduleStates();
670 
671   const int render_audiobuffer_sample_rate_hz =
672       formats_.api_format.reverse_output_stream().num_frames() == 0
673           ? formats_.render_processing_format.sample_rate_hz()
674           : formats_.api_format.reverse_output_stream().sample_rate_hz();
675   if (formats_.api_format.reverse_input_stream().num_channels() > 0) {
676     render_.render_audio.reset(new AudioBuffer(
677         formats_.api_format.reverse_input_stream().sample_rate_hz(),
678         formats_.api_format.reverse_input_stream().num_channels(),
679         formats_.render_processing_format.sample_rate_hz(),
680         formats_.render_processing_format.num_channels(),
681         render_audiobuffer_sample_rate_hz,
682         formats_.render_processing_format.num_channels()));
683     if (formats_.api_format.reverse_input_stream() !=
684         formats_.api_format.reverse_output_stream()) {
685       render_.render_converter = AudioConverter::Create(
686           formats_.api_format.reverse_input_stream().num_channels(),
687           formats_.api_format.reverse_input_stream().num_frames(),
688           formats_.api_format.reverse_output_stream().num_channels(),
689           formats_.api_format.reverse_output_stream().num_frames());
690     } else {
691       render_.render_converter.reset(nullptr);
692     }
693   } else {
694     render_.render_audio.reset(nullptr);
695     render_.render_converter.reset(nullptr);
696   }
697 
698   capture_.capture_audio.reset(new AudioBuffer(
699       formats_.api_format.input_stream().sample_rate_hz(),
700       formats_.api_format.input_stream().num_channels(),
701       capture_nonlocked_.capture_processing_format.sample_rate_hz(),
702       formats_.api_format.output_stream().num_channels(),
703       formats_.api_format.output_stream().sample_rate_hz(),
704       formats_.api_format.output_stream().num_channels()));
705   SetDownmixMethod(*capture_.capture_audio,
706                    config_.pipeline.capture_downmix_method);
707 
708   if (capture_nonlocked_.capture_processing_format.sample_rate_hz() <
709           formats_.api_format.output_stream().sample_rate_hz() &&
710       formats_.api_format.output_stream().sample_rate_hz() == 48000) {
711     capture_.capture_fullband_audio.reset(
712         new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(),
713                         formats_.api_format.input_stream().num_channels(),
714                         formats_.api_format.output_stream().sample_rate_hz(),
715                         formats_.api_format.output_stream().num_channels(),
716                         formats_.api_format.output_stream().sample_rate_hz(),
717                         formats_.api_format.output_stream().num_channels()));
718     SetDownmixMethod(*capture_.capture_fullband_audio,
719                      config_.pipeline.capture_downmix_method);
720   } else {
721     capture_.capture_fullband_audio.reset();
722   }
723 
724   AllocateRenderQueue();
725 
726   InitializeGainController1();
727   InitializeTransientSuppressor();
728   InitializeHighPassFilter(true);
729   InitializeResidualEchoDetector();
730   InitializeEchoController();
731   InitializeGainController2(/*config_has_changed=*/true);
732   InitializeVoiceActivityDetector(/*config_has_changed=*/true);
733   InitializeNoiseSuppressor();
734   InitializeAnalyzer();
735   InitializePostProcessor();
736   InitializePreProcessor();
737   InitializeCaptureLevelsAdjuster();
738 
739   if (aec_dump_) {
740     aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
741   }
742 }
743 
InitializeLocked(const ProcessingConfig & config)744 void AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
745   UpdateActiveSubmoduleStates();
746 
747   formats_.api_format = config;
748 
749   // Choose maximum rate to use for the split filtering.
750   RTC_DCHECK(config_.pipeline.maximum_internal_processing_rate == 48000 ||
751              config_.pipeline.maximum_internal_processing_rate == 32000);
752   int max_splitting_rate = 48000;
753   if (config_.pipeline.maximum_internal_processing_rate == 32000) {
754     max_splitting_rate = config_.pipeline.maximum_internal_processing_rate;
755   }
756 
757   int capture_processing_rate = SuitableProcessRate(
758       std::min(formats_.api_format.input_stream().sample_rate_hz(),
759                formats_.api_format.output_stream().sample_rate_hz()),
760       max_splitting_rate,
761       submodule_states_.CaptureMultiBandSubModulesActive() ||
762           submodule_states_.RenderMultiBandSubModulesActive());
763   RTC_DCHECK_NE(8000, capture_processing_rate);
764 
765   capture_nonlocked_.capture_processing_format =
766       StreamConfig(capture_processing_rate);
767 
768   int render_processing_rate;
769   if (!capture_nonlocked_.echo_controller_enabled) {
770     render_processing_rate = SuitableProcessRate(
771         std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(),
772                  formats_.api_format.reverse_output_stream().sample_rate_hz()),
773         max_splitting_rate,
774         submodule_states_.CaptureMultiBandSubModulesActive() ||
775             submodule_states_.RenderMultiBandSubModulesActive());
776   } else {
777     render_processing_rate = capture_processing_rate;
778   }
779 
780   // If the forward sample rate is 8 kHz, the render stream is also processed
781   // at this rate.
782   if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
783       kSampleRate8kHz) {
784     render_processing_rate = kSampleRate8kHz;
785   } else {
786     render_processing_rate =
787         std::max(render_processing_rate, static_cast<int>(kSampleRate16kHz));
788   }
789 
790   RTC_DCHECK_NE(8000, render_processing_rate);
791 
792   if (submodule_states_.RenderMultiBandSubModulesActive()) {
793     // By default, downmix the render stream to mono for analysis. This has been
794     // demonstrated to work well for AEC in most practical scenarios.
795     const bool multi_channel_render = config_.pipeline.multi_channel_render &&
796                                       constants_.multi_channel_render_support;
797     int render_processing_num_channels =
798         multi_channel_render
799             ? formats_.api_format.reverse_input_stream().num_channels()
800             : 1;
801     formats_.render_processing_format =
802         StreamConfig(render_processing_rate, render_processing_num_channels);
803   } else {
804     formats_.render_processing_format = StreamConfig(
805         formats_.api_format.reverse_input_stream().sample_rate_hz(),
806         formats_.api_format.reverse_input_stream().num_channels());
807   }
808 
809   if (capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
810           kSampleRate32kHz ||
811       capture_nonlocked_.capture_processing_format.sample_rate_hz() ==
812           kSampleRate48kHz) {
813     capture_nonlocked_.split_rate = kSampleRate16kHz;
814   } else {
815     capture_nonlocked_.split_rate =
816         capture_nonlocked_.capture_processing_format.sample_rate_hz();
817   }
818 
819   InitializeLocked();
820 }
821 
ApplyConfig(const AudioProcessing::Config & config)822 void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
823   // Run in a single-threaded manner when applying the settings.
824   MutexLock lock_render(&mutex_render_);
825   MutexLock lock_capture(&mutex_capture_);
826 
827   // TODO(bugs.webrtc.org/7494): Replace `adjusted_config` with `config` after
828   // "WebRTC-Audio-InputVolumeControllerExperiment" field trial is removed.
829   const auto adjusted_config =
830       AdjustConfig(config, input_volume_controller_config_override_);
831 
832   RTC_LOG(LS_INFO) << "AudioProcessing::ApplyConfig: "
833                    << adjusted_config.ToString();
834 
835   const bool pipeline_config_changed =
836       config_.pipeline.multi_channel_render !=
837           adjusted_config.pipeline.multi_channel_render ||
838       config_.pipeline.multi_channel_capture !=
839           adjusted_config.pipeline.multi_channel_capture ||
840       config_.pipeline.maximum_internal_processing_rate !=
841           adjusted_config.pipeline.maximum_internal_processing_rate ||
842       config_.pipeline.capture_downmix_method !=
843           adjusted_config.pipeline.capture_downmix_method;
844 
845   const bool aec_config_changed =
846       config_.echo_canceller.enabled !=
847           adjusted_config.echo_canceller.enabled ||
848       config_.echo_canceller.mobile_mode !=
849           adjusted_config.echo_canceller.mobile_mode;
850 
851   const bool agc1_config_changed =
852       config_.gain_controller1 != adjusted_config.gain_controller1;
853 
854   const bool agc2_config_changed =
855       config_.gain_controller2 != adjusted_config.gain_controller2;
856 
857   const bool ns_config_changed =
858       config_.noise_suppression.enabled !=
859           adjusted_config.noise_suppression.enabled ||
860       config_.noise_suppression.level !=
861           adjusted_config.noise_suppression.level;
862 
863   const bool ts_config_changed = config_.transient_suppression.enabled !=
864                                  adjusted_config.transient_suppression.enabled;
865 
866   const bool pre_amplifier_config_changed =
867       config_.pre_amplifier.enabled != adjusted_config.pre_amplifier.enabled ||
868       config_.pre_amplifier.fixed_gain_factor !=
869           adjusted_config.pre_amplifier.fixed_gain_factor;
870 
871   const bool gain_adjustment_config_changed =
872       config_.capture_level_adjustment !=
873       adjusted_config.capture_level_adjustment;
874 
875   config_ = adjusted_config;
876 
877   if (aec_config_changed) {
878     InitializeEchoController();
879   }
880 
881   if (ns_config_changed) {
882     InitializeNoiseSuppressor();
883   }
884 
885   if (ts_config_changed) {
886     InitializeTransientSuppressor();
887   }
888 
889   InitializeHighPassFilter(false);
890 
891   if (agc1_config_changed) {
892     InitializeGainController1();
893   }
894 
895   const bool config_ok = GainController2::Validate(config_.gain_controller2);
896   if (!config_ok) {
897     RTC_LOG(LS_ERROR)
898         << "Invalid Gain Controller 2 config; using the default config.";
899     config_.gain_controller2 = AudioProcessing::Config::GainController2();
900   }
901 
902   InitializeGainController2(agc2_config_changed);
903   InitializeVoiceActivityDetector(agc2_config_changed);
904 
905   if (pre_amplifier_config_changed || gain_adjustment_config_changed) {
906     InitializeCaptureLevelsAdjuster();
907   }
908 
909   // Reinitialization must happen after all submodule configuration to avoid
910   // additional reinitializations on the next capture / render processing call.
911   if (pipeline_config_changed) {
912     InitializeLocked(formats_.api_format);
913   }
914 }
915 
OverrideSubmoduleCreationForTesting(const ApmSubmoduleCreationOverrides & overrides)916 void AudioProcessingImpl::OverrideSubmoduleCreationForTesting(
917     const ApmSubmoduleCreationOverrides& overrides) {
918   MutexLock lock(&mutex_capture_);
919   submodule_creation_overrides_ = overrides;
920 }
921 
proc_sample_rate_hz() const922 int AudioProcessingImpl::proc_sample_rate_hz() const {
923   // Used as callback from submodules, hence locking is not allowed.
924   return capture_nonlocked_.capture_processing_format.sample_rate_hz();
925 }
926 
proc_fullband_sample_rate_hz() const927 int AudioProcessingImpl::proc_fullband_sample_rate_hz() const {
928   return capture_.capture_fullband_audio
929              ? capture_.capture_fullband_audio->num_frames() * 100
930              : capture_nonlocked_.capture_processing_format.sample_rate_hz();
931 }
932 
proc_split_sample_rate_hz() const933 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
934   // Used as callback from submodules, hence locking is not allowed.
935   return capture_nonlocked_.split_rate;
936 }
937 
num_reverse_channels() const938 size_t AudioProcessingImpl::num_reverse_channels() const {
939   // Used as callback from submodules, hence locking is not allowed.
940   return formats_.render_processing_format.num_channels();
941 }
942 
num_input_channels() const943 size_t AudioProcessingImpl::num_input_channels() const {
944   // Used as callback from submodules, hence locking is not allowed.
945   return formats_.api_format.input_stream().num_channels();
946 }
947 
num_proc_channels() const948 size_t AudioProcessingImpl::num_proc_channels() const {
949   // Used as callback from submodules, hence locking is not allowed.
950   const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
951                                      constants_.multi_channel_capture_support;
952   if (capture_nonlocked_.echo_controller_enabled && !multi_channel_capture) {
953     return 1;
954   }
955   return num_output_channels();
956 }
957 
num_output_channels() const958 size_t AudioProcessingImpl::num_output_channels() const {
959   // Used as callback from submodules, hence locking is not allowed.
960   return formats_.api_format.output_stream().num_channels();
961 }
962 
set_output_will_be_muted(bool muted)963 void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
964   MutexLock lock(&mutex_capture_);
965   HandleCaptureOutputUsedSetting(!muted);
966 }
967 
HandleCaptureOutputUsedSetting(bool capture_output_used)968 void AudioProcessingImpl::HandleCaptureOutputUsedSetting(
969     bool capture_output_used) {
970   capture_.capture_output_used =
971       capture_output_used || !constants_.minimize_processing_for_unused_output;
972 
973   if (submodules_.agc_manager.get()) {
974     submodules_.agc_manager->HandleCaptureOutputUsedChange(
975         capture_.capture_output_used);
976   }
977   if (submodules_.echo_controller) {
978     submodules_.echo_controller->SetCaptureOutputUsage(
979         capture_.capture_output_used);
980   }
981   if (submodules_.noise_suppressor) {
982     submodules_.noise_suppressor->SetCaptureOutputUsage(
983         capture_.capture_output_used);
984   }
985   if (submodules_.gain_controller2) {
986     submodules_.gain_controller2->SetCaptureOutputUsed(
987         capture_.capture_output_used);
988   }
989 }
990 
SetRuntimeSetting(RuntimeSetting setting)991 void AudioProcessingImpl::SetRuntimeSetting(RuntimeSetting setting) {
992   PostRuntimeSetting(setting);
993 }
994 
PostRuntimeSetting(RuntimeSetting setting)995 bool AudioProcessingImpl::PostRuntimeSetting(RuntimeSetting setting) {
996   switch (setting.type()) {
997     case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
998     case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
999       return render_runtime_settings_enqueuer_.Enqueue(setting);
1000     case RuntimeSetting::Type::kCapturePreGain:
1001     case RuntimeSetting::Type::kCapturePostGain:
1002     case RuntimeSetting::Type::kCaptureCompressionGain:
1003     case RuntimeSetting::Type::kCaptureFixedPostGain:
1004     case RuntimeSetting::Type::kCaptureOutputUsed:
1005       return capture_runtime_settings_enqueuer_.Enqueue(setting);
1006     case RuntimeSetting::Type::kPlayoutVolumeChange: {
1007       bool enqueueing_successful;
1008       enqueueing_successful =
1009           capture_runtime_settings_enqueuer_.Enqueue(setting);
1010       enqueueing_successful =
1011           render_runtime_settings_enqueuer_.Enqueue(setting) &&
1012           enqueueing_successful;
1013       return enqueueing_successful;
1014     }
1015     case RuntimeSetting::Type::kNotSpecified:
1016       RTC_DCHECK_NOTREACHED();
1017       return true;
1018   }
1019   // The language allows the enum to have a non-enumerator
1020   // value. Check that this doesn't happen.
1021   RTC_DCHECK_NOTREACHED();
1022   return true;
1023 }
1024 
RuntimeSettingEnqueuer(SwapQueue<RuntimeSetting> * runtime_settings)1025 AudioProcessingImpl::RuntimeSettingEnqueuer::RuntimeSettingEnqueuer(
1026     SwapQueue<RuntimeSetting>* runtime_settings)
1027     : runtime_settings_(*runtime_settings) {
1028   RTC_DCHECK(runtime_settings);
1029 }
1030 
1031 AudioProcessingImpl::RuntimeSettingEnqueuer::~RuntimeSettingEnqueuer() =
1032     default;
1033 
Enqueue(RuntimeSetting setting)1034 bool AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue(
1035     RuntimeSetting setting) {
1036   const bool successful_insert = runtime_settings_.Insert(&setting);
1037 
1038   if (!successful_insert) {
1039     RTC_LOG(LS_ERROR) << "Cannot enqueue a new runtime setting.";
1040   }
1041   return successful_insert;
1042 }
1043 
MaybeInitializeCapture(const StreamConfig & input_config,const StreamConfig & output_config)1044 void AudioProcessingImpl::MaybeInitializeCapture(
1045     const StreamConfig& input_config,
1046     const StreamConfig& output_config) {
1047   ProcessingConfig processing_config;
1048   bool reinitialization_required = false;
1049   {
1050     // Acquire the capture lock in order to access api_format. The lock is
1051     // released immediately, as we may need to acquire the render lock as part
1052     // of the conditional reinitialization.
1053     MutexLock lock_capture(&mutex_capture_);
1054     processing_config = formats_.api_format;
1055     reinitialization_required = UpdateActiveSubmoduleStates();
1056   }
1057 
1058   if (processing_config.input_stream() != input_config) {
1059     reinitialization_required = true;
1060   }
1061 
1062   if (processing_config.output_stream() != output_config) {
1063     reinitialization_required = true;
1064   }
1065 
1066   if (reinitialization_required) {
1067     MutexLock lock_render(&mutex_render_);
1068     MutexLock lock_capture(&mutex_capture_);
1069     // Reread the API format since the render format may have changed.
1070     processing_config = formats_.api_format;
1071     processing_config.input_stream() = input_config;
1072     processing_config.output_stream() = output_config;
1073     InitializeLocked(processing_config);
1074   }
1075 }
1076 
ProcessStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)1077 int AudioProcessingImpl::ProcessStream(const float* const* src,
1078                                        const StreamConfig& input_config,
1079                                        const StreamConfig& output_config,
1080                                        float* const* dest) {
1081   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
1082   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1083   RETURN_ON_ERR(
1084       HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1085   MaybeInitializeCapture(input_config, output_config);
1086 
1087   MutexLock lock_capture(&mutex_capture_);
1088 
1089   if (aec_dump_) {
1090     RecordUnprocessedCaptureStream(src);
1091   }
1092 
1093   capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
1094   if (capture_.capture_fullband_audio) {
1095     capture_.capture_fullband_audio->CopyFrom(
1096         src, formats_.api_format.input_stream());
1097   }
1098   RETURN_ON_ERR(ProcessCaptureStreamLocked());
1099   if (capture_.capture_fullband_audio) {
1100     capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(),
1101                                             dest);
1102   } else {
1103     capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
1104   }
1105 
1106   if (aec_dump_) {
1107     RecordProcessedCaptureStream(dest);
1108   }
1109   return kNoError;
1110 }
1111 
HandleCaptureRuntimeSettings()1112 void AudioProcessingImpl::HandleCaptureRuntimeSettings() {
1113   RuntimeSetting setting;
1114   int num_settings_processed = 0;
1115   while (capture_runtime_settings_.Remove(&setting)) {
1116     if (aec_dump_) {
1117       aec_dump_->WriteRuntimeSetting(setting);
1118     }
1119     switch (setting.type()) {
1120       case RuntimeSetting::Type::kCapturePreGain:
1121         if (config_.pre_amplifier.enabled ||
1122             config_.capture_level_adjustment.enabled) {
1123           float value;
1124           setting.GetFloat(&value);
1125           // If the pre-amplifier is used, apply the new gain to the
1126           // pre-amplifier regardless if the capture level adjustment is
1127           // activated. This approach allows both functionalities to coexist
1128           // until they have been properly merged.
1129           if (config_.pre_amplifier.enabled) {
1130             config_.pre_amplifier.fixed_gain_factor = value;
1131           } else {
1132             config_.capture_level_adjustment.pre_gain_factor = value;
1133           }
1134 
1135           // Use both the pre-amplifier and the capture level adjustment gains
1136           // as pre-gains.
1137           float gain = 1.f;
1138           if (config_.pre_amplifier.enabled) {
1139             gain *= config_.pre_amplifier.fixed_gain_factor;
1140           }
1141           if (config_.capture_level_adjustment.enabled) {
1142             gain *= config_.capture_level_adjustment.pre_gain_factor;
1143           }
1144 
1145           submodules_.capture_levels_adjuster->SetPreGain(gain);
1146         }
1147         // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump.
1148         break;
1149       case RuntimeSetting::Type::kCapturePostGain:
1150         if (config_.capture_level_adjustment.enabled) {
1151           float value;
1152           setting.GetFloat(&value);
1153           config_.capture_level_adjustment.post_gain_factor = value;
1154           submodules_.capture_levels_adjuster->SetPostGain(
1155               config_.capture_level_adjustment.post_gain_factor);
1156         }
1157         // TODO(bugs.chromium.org/9138): Log setting handling by Aec Dump.
1158         break;
1159       case RuntimeSetting::Type::kCaptureCompressionGain: {
1160         if (!submodules_.agc_manager &&
1161             !(submodules_.gain_controller2 &&
1162               config_.gain_controller2.input_volume_controller.enabled)) {
1163           float value;
1164           setting.GetFloat(&value);
1165           int int_value = static_cast<int>(value + .5f);
1166           config_.gain_controller1.compression_gain_db = int_value;
1167           if (submodules_.gain_control) {
1168             int error =
1169                 submodules_.gain_control->set_compression_gain_db(int_value);
1170             RTC_DCHECK_EQ(kNoError, error);
1171           }
1172         }
1173         break;
1174       }
1175       case RuntimeSetting::Type::kCaptureFixedPostGain: {
1176         if (submodules_.gain_controller2) {
1177           float value;
1178           setting.GetFloat(&value);
1179           config_.gain_controller2.fixed_digital.gain_db = value;
1180           submodules_.gain_controller2->SetFixedGainDb(value);
1181         }
1182         break;
1183       }
1184       case RuntimeSetting::Type::kPlayoutVolumeChange: {
1185         int value;
1186         setting.GetInt(&value);
1187         capture_.playout_volume = value;
1188         break;
1189       }
1190       case RuntimeSetting::Type::kPlayoutAudioDeviceChange:
1191         RTC_DCHECK_NOTREACHED();
1192         break;
1193       case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
1194         RTC_DCHECK_NOTREACHED();
1195         break;
1196       case RuntimeSetting::Type::kNotSpecified:
1197         RTC_DCHECK_NOTREACHED();
1198         break;
1199       case RuntimeSetting::Type::kCaptureOutputUsed:
1200         bool value;
1201         setting.GetBool(&value);
1202         HandleCaptureOutputUsedSetting(value);
1203         break;
1204     }
1205     ++num_settings_processed;
1206   }
1207 
1208   if (num_settings_processed >= RuntimeSettingQueueSize()) {
1209     // Handle overrun of the runtime settings queue, which likely will has
1210     // caused settings to be discarded.
1211     HandleOverrunInCaptureRuntimeSettingsQueue();
1212   }
1213 }
1214 
HandleOverrunInCaptureRuntimeSettingsQueue()1215 void AudioProcessingImpl::HandleOverrunInCaptureRuntimeSettingsQueue() {
1216   // Fall back to a safe state for the case when a setting for capture output
1217   // usage setting has been missed.
1218   HandleCaptureOutputUsedSetting(/*capture_output_used=*/true);
1219 }
1220 
HandleRenderRuntimeSettings()1221 void AudioProcessingImpl::HandleRenderRuntimeSettings() {
1222   RuntimeSetting setting;
1223   while (render_runtime_settings_.Remove(&setting)) {
1224     if (aec_dump_) {
1225       aec_dump_->WriteRuntimeSetting(setting);
1226     }
1227     switch (setting.type()) {
1228       case RuntimeSetting::Type::kPlayoutAudioDeviceChange:  // fall-through
1229       case RuntimeSetting::Type::kPlayoutVolumeChange:       // fall-through
1230       case RuntimeSetting::Type::kCustomRenderProcessingRuntimeSetting:
1231         if (submodules_.render_pre_processor) {
1232           submodules_.render_pre_processor->SetRuntimeSetting(setting);
1233         }
1234         break;
1235       case RuntimeSetting::Type::kCapturePreGain:          // fall-through
1236       case RuntimeSetting::Type::kCapturePostGain:         // fall-through
1237       case RuntimeSetting::Type::kCaptureCompressionGain:  // fall-through
1238       case RuntimeSetting::Type::kCaptureFixedPostGain:    // fall-through
1239       case RuntimeSetting::Type::kCaptureOutputUsed:       // fall-through
1240       case RuntimeSetting::Type::kNotSpecified:
1241         RTC_DCHECK_NOTREACHED();
1242         break;
1243     }
1244   }
1245 }
1246 
QueueBandedRenderAudio(AudioBuffer * audio)1247 void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
1248   RTC_DCHECK_GE(160, audio->num_frames_per_band());
1249 
1250   if (submodules_.echo_control_mobile) {
1251     EchoControlMobileImpl::PackRenderAudioBuffer(audio, num_output_channels(),
1252                                                  num_reverse_channels(),
1253                                                  &aecm_render_queue_buffer_);
1254     RTC_DCHECK(aecm_render_signal_queue_);
1255     // Insert the samples into the queue.
1256     if (!aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_)) {
1257       // The data queue is full and needs to be emptied.
1258       EmptyQueuedRenderAudio();
1259 
1260       // Retry the insert (should always work).
1261       bool result =
1262           aecm_render_signal_queue_->Insert(&aecm_render_queue_buffer_);
1263       RTC_DCHECK(result);
1264     }
1265   }
1266 
1267   if (!submodules_.agc_manager && submodules_.gain_control) {
1268     GainControlImpl::PackRenderAudioBuffer(*audio, &agc_render_queue_buffer_);
1269     // Insert the samples into the queue.
1270     if (!agc_render_signal_queue_->Insert(&agc_render_queue_buffer_)) {
1271       // The data queue is full and needs to be emptied.
1272       EmptyQueuedRenderAudio();
1273 
1274       // Retry the insert (should always work).
1275       bool result = agc_render_signal_queue_->Insert(&agc_render_queue_buffer_);
1276       RTC_DCHECK(result);
1277     }
1278   }
1279 }
1280 
QueueNonbandedRenderAudio(AudioBuffer * audio)1281 void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
1282   if (submodules_.echo_detector) {
1283     PackRenderAudioBufferForEchoDetector(*audio, red_render_queue_buffer_);
1284     RTC_DCHECK(red_render_signal_queue_);
1285     // Insert the samples into the queue.
1286     if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) {
1287       // The data queue is full and needs to be emptied.
1288       EmptyQueuedRenderAudio();
1289 
1290       // Retry the insert (should always work).
1291       bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_);
1292       RTC_DCHECK(result);
1293     }
1294   }
1295 }
1296 
AllocateRenderQueue()1297 void AudioProcessingImpl::AllocateRenderQueue() {
1298   const size_t new_agc_render_queue_element_max_size =
1299       std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);
1300 
1301   const size_t new_red_render_queue_element_max_size =
1302       std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
1303 
1304   // Reallocate the queues if the queue item sizes are too small to fit the
1305   // data to put in the queues.
1306 
1307   if (agc_render_queue_element_max_size_ <
1308       new_agc_render_queue_element_max_size) {
1309     agc_render_queue_element_max_size_ = new_agc_render_queue_element_max_size;
1310 
1311     std::vector<int16_t> template_queue_element(
1312         agc_render_queue_element_max_size_);
1313 
1314     agc_render_signal_queue_.reset(
1315         new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
1316             kMaxNumFramesToBuffer, template_queue_element,
1317             RenderQueueItemVerifier<int16_t>(
1318                 agc_render_queue_element_max_size_)));
1319 
1320     agc_render_queue_buffer_.resize(agc_render_queue_element_max_size_);
1321     agc_capture_queue_buffer_.resize(agc_render_queue_element_max_size_);
1322   } else {
1323     agc_render_signal_queue_->Clear();
1324   }
1325 
1326   if (submodules_.echo_detector) {
1327     if (red_render_queue_element_max_size_ <
1328         new_red_render_queue_element_max_size) {
1329       red_render_queue_element_max_size_ =
1330           new_red_render_queue_element_max_size;
1331 
1332       std::vector<float> template_queue_element(
1333           red_render_queue_element_max_size_);
1334 
1335       red_render_signal_queue_.reset(
1336           new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>(
1337               kMaxNumFramesToBuffer, template_queue_element,
1338               RenderQueueItemVerifier<float>(
1339                   red_render_queue_element_max_size_)));
1340 
1341       red_render_queue_buffer_.resize(red_render_queue_element_max_size_);
1342       red_capture_queue_buffer_.resize(red_render_queue_element_max_size_);
1343     } else {
1344       red_render_signal_queue_->Clear();
1345     }
1346   }
1347 }
1348 
EmptyQueuedRenderAudio()1349 void AudioProcessingImpl::EmptyQueuedRenderAudio() {
1350   MutexLock lock_capture(&mutex_capture_);
1351   EmptyQueuedRenderAudioLocked();
1352 }
1353 
EmptyQueuedRenderAudioLocked()1354 void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() {
1355   if (submodules_.echo_control_mobile) {
1356     RTC_DCHECK(aecm_render_signal_queue_);
1357     while (aecm_render_signal_queue_->Remove(&aecm_capture_queue_buffer_)) {
1358       submodules_.echo_control_mobile->ProcessRenderAudio(
1359           aecm_capture_queue_buffer_);
1360     }
1361   }
1362 
1363   if (submodules_.gain_control) {
1364     while (agc_render_signal_queue_->Remove(&agc_capture_queue_buffer_)) {
1365       submodules_.gain_control->ProcessRenderAudio(agc_capture_queue_buffer_);
1366     }
1367   }
1368 
1369   if (submodules_.echo_detector) {
1370     while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) {
1371       submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_);
1372     }
1373   }
1374 }
1375 
ProcessStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1376 int AudioProcessingImpl::ProcessStream(const int16_t* const src,
1377                                        const StreamConfig& input_config,
1378                                        const StreamConfig& output_config,
1379                                        int16_t* const dest) {
1380   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
1381 
1382   RETURN_ON_ERR(
1383       HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1384   MaybeInitializeCapture(input_config, output_config);
1385 
1386   MutexLock lock_capture(&mutex_capture_);
1387   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1388 
1389   if (aec_dump_) {
1390     RecordUnprocessedCaptureStream(src, input_config);
1391   }
1392 
1393   capture_.capture_audio->CopyFrom(src, input_config);
1394   if (capture_.capture_fullband_audio) {
1395     capture_.capture_fullband_audio->CopyFrom(src, input_config);
1396   }
1397   RETURN_ON_ERR(ProcessCaptureStreamLocked());
1398   if (submodule_states_.CaptureMultiBandProcessingPresent() ||
1399       submodule_states_.CaptureFullBandProcessingActive()) {
1400     if (capture_.capture_fullband_audio) {
1401       capture_.capture_fullband_audio->CopyTo(output_config, dest);
1402     } else {
1403       capture_.capture_audio->CopyTo(output_config, dest);
1404     }
1405   }
1406 
1407   if (aec_dump_) {
1408     RecordProcessedCaptureStream(dest, output_config);
1409   }
1410   return kNoError;
1411 }
1412 
ProcessCaptureStreamLocked()1413 int AudioProcessingImpl::ProcessCaptureStreamLocked() {
1414   EmptyQueuedRenderAudioLocked();
1415   HandleCaptureRuntimeSettings();
1416   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1417 
1418   // Ensure that not both the AEC and AECM are active at the same time.
1419   // TODO(peah): Simplify once the public API Enable functions for these
1420   // are moved to APM.
1421   RTC_DCHECK_LE(
1422       !!submodules_.echo_controller + !!submodules_.echo_control_mobile, 1);
1423 
1424   data_dumper_->DumpRaw(
1425       "applied_input_volume",
1426       capture_.applied_input_volume.value_or(kUnspecifiedDataDumpInputVolume));
1427 
1428   AudioBuffer* capture_buffer = capture_.capture_audio.get();  // For brevity.
1429   AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1430 
1431   if (submodules_.high_pass_filter &&
1432       config_.high_pass_filter.apply_in_full_band &&
1433       !constants_.enforce_split_band_hpf) {
1434     submodules_.high_pass_filter->Process(capture_buffer,
1435                                           /*use_split_band_data=*/false);
1436   }
1437 
1438   if (submodules_.capture_levels_adjuster) {
1439     if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) {
1440       // When the input volume is emulated, retrieve the volume applied to the
1441       // input audio and notify that to APM so that the volume is passed to the
1442       // active AGC.
1443       set_stream_analog_level_locked(
1444           submodules_.capture_levels_adjuster->GetAnalogMicGainLevel());
1445     }
1446     submodules_.capture_levels_adjuster->ApplyPreLevelAdjustment(
1447         *capture_buffer);
1448   }
1449 
1450   capture_input_rms_.Analyze(rtc::ArrayView<const float>(
1451       capture_buffer->channels_const()[0],
1452       capture_nonlocked_.capture_processing_format.num_frames()));
1453   const bool log_rms = ++capture_rms_interval_counter_ >= 1000;
1454   if (log_rms) {
1455     capture_rms_interval_counter_ = 0;
1456     RmsLevel::Levels levels = capture_input_rms_.AverageAndPeak();
1457     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelAverageRms",
1458                                 levels.average, 1, RmsLevel::kMinLevelDb, 64);
1459     RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureInputLevelPeakRms",
1460                                 levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1461   }
1462 
1463   if (capture_.applied_input_volume.has_value()) {
1464     applied_input_volume_stats_reporter_.UpdateStatistics(
1465         *capture_.applied_input_volume);
1466   }
1467 
1468   if (submodules_.echo_controller) {
1469     // Determine if the echo path gain has changed by checking all the gains
1470     // applied before AEC.
1471     capture_.echo_path_gain_change = capture_.applied_input_volume_changed;
1472 
1473     // Detect and flag any change in the capture level adjustment pre-gain.
1474     if (submodules_.capture_levels_adjuster) {
1475       float pre_adjustment_gain =
1476           submodules_.capture_levels_adjuster->GetPreAdjustmentGain();
1477       capture_.echo_path_gain_change =
1478           capture_.echo_path_gain_change ||
1479           (capture_.prev_pre_adjustment_gain != pre_adjustment_gain &&
1480            capture_.prev_pre_adjustment_gain >= 0.0f);
1481       capture_.prev_pre_adjustment_gain = pre_adjustment_gain;
1482     }
1483 
1484     // Detect volume change.
1485     capture_.echo_path_gain_change =
1486         capture_.echo_path_gain_change ||
1487         (capture_.prev_playout_volume != capture_.playout_volume &&
1488          capture_.prev_playout_volume >= 0);
1489     capture_.prev_playout_volume = capture_.playout_volume;
1490 
1491     submodules_.echo_controller->AnalyzeCapture(capture_buffer);
1492   }
1493 
1494   if (submodules_.agc_manager) {
1495     submodules_.agc_manager->AnalyzePreProcess(*capture_buffer);
1496   }
1497 
1498   if (submodules_.gain_controller2 &&
1499       config_.gain_controller2.input_volume_controller.enabled) {
1500     // Expect the volume to be available if the input controller is enabled.
1501     RTC_DCHECK(capture_.applied_input_volume.has_value());
1502     if (capture_.applied_input_volume.has_value()) {
1503       submodules_.gain_controller2->Analyze(*capture_.applied_input_volume,
1504                                             *capture_buffer);
1505     }
1506   }
1507 
1508   if (submodule_states_.CaptureMultiBandSubModulesActive() &&
1509       SampleRateSupportsMultiBand(
1510           capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1511     capture_buffer->SplitIntoFrequencyBands();
1512   }
1513 
1514   const bool multi_channel_capture = config_.pipeline.multi_channel_capture &&
1515                                      constants_.multi_channel_capture_support;
1516   if (submodules_.echo_controller && !multi_channel_capture) {
1517     // Force down-mixing of the number of channels after the detection of
1518     // capture signal saturation.
1519     // TODO(peah): Look into ensuring that this kind of tampering with the
1520     // AudioBuffer functionality should not be needed.
1521     capture_buffer->set_num_channels(1);
1522   }
1523 
1524   if (submodules_.high_pass_filter &&
1525       (!config_.high_pass_filter.apply_in_full_band ||
1526        constants_.enforce_split_band_hpf)) {
1527     submodules_.high_pass_filter->Process(capture_buffer,
1528                                           /*use_split_band_data=*/true);
1529   }
1530 
1531   if (submodules_.gain_control) {
1532     RETURN_ON_ERR(
1533         submodules_.gain_control->AnalyzeCaptureAudio(*capture_buffer));
1534   }
1535 
1536   if ((!config_.noise_suppression.analyze_linear_aec_output_when_available ||
1537        !linear_aec_buffer || submodules_.echo_control_mobile) &&
1538       submodules_.noise_suppressor) {
1539     submodules_.noise_suppressor->Analyze(*capture_buffer);
1540   }
1541 
1542   if (submodules_.echo_control_mobile) {
1543     // Ensure that the stream delay was set before the call to the
1544     // AECM ProcessCaptureAudio function.
1545     if (!capture_.was_stream_delay_set) {
1546       return AudioProcessing::kStreamParameterNotSetError;
1547     }
1548 
1549     if (submodules_.noise_suppressor) {
1550       submodules_.noise_suppressor->Process(capture_buffer);
1551     }
1552 
1553     RETURN_ON_ERR(submodules_.echo_control_mobile->ProcessCaptureAudio(
1554         capture_buffer, stream_delay_ms()));
1555   } else {
1556     if (submodules_.echo_controller) {
1557       data_dumper_->DumpRaw("stream_delay", stream_delay_ms());
1558 
1559       if (capture_.was_stream_delay_set) {
1560         submodules_.echo_controller->SetAudioBufferDelay(stream_delay_ms());
1561       }
1562 
1563       submodules_.echo_controller->ProcessCapture(
1564           capture_buffer, linear_aec_buffer, capture_.echo_path_gain_change);
1565     }
1566 
1567     if (config_.noise_suppression.analyze_linear_aec_output_when_available &&
1568         linear_aec_buffer && submodules_.noise_suppressor) {
1569       submodules_.noise_suppressor->Analyze(*linear_aec_buffer);
1570     }
1571 
1572     if (submodules_.noise_suppressor) {
1573       submodules_.noise_suppressor->Process(capture_buffer);
1574     }
1575   }
1576 
1577   if (submodules_.agc_manager) {
1578     submodules_.agc_manager->Process(*capture_buffer);
1579 
1580     absl::optional<int> new_digital_gain =
1581         submodules_.agc_manager->GetDigitalComressionGain();
1582     if (new_digital_gain && submodules_.gain_control) {
1583       submodules_.gain_control->set_compression_gain_db(*new_digital_gain);
1584     }
1585   }
1586 
1587   if (submodules_.gain_control) {
1588     // TODO(peah): Add reporting from AEC3 whether there is echo.
1589     RETURN_ON_ERR(submodules_.gain_control->ProcessCaptureAudio(
1590         capture_buffer, /*stream_has_echo*/ false));
1591   }
1592 
1593   if (submodule_states_.CaptureMultiBandProcessingPresent() &&
1594       SampleRateSupportsMultiBand(
1595           capture_nonlocked_.capture_processing_format.sample_rate_hz())) {
1596     capture_buffer->MergeFrequencyBands();
1597   }
1598 
1599   if (capture_.capture_output_used) {
1600     if (capture_.capture_fullband_audio) {
1601       const auto& ec = submodules_.echo_controller;
1602       bool ec_active = ec ? ec->ActiveProcessing() : false;
1603       // Only update the fullband buffer if the multiband processing has changed
1604       // the signal. Keep the original signal otherwise.
1605       if (submodule_states_.CaptureMultiBandProcessingActive(ec_active)) {
1606         capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
1607       }
1608       capture_buffer = capture_.capture_fullband_audio.get();
1609     }
1610 
1611     if (submodules_.echo_detector) {
1612       submodules_.echo_detector->AnalyzeCaptureAudio(
1613           rtc::ArrayView<const float>(capture_buffer->channels()[0],
1614                                       capture_buffer->num_frames()));
1615     }
1616 
1617     absl::optional<float> voice_probability;
1618     if (!!submodules_.voice_activity_detector) {
1619       voice_probability = submodules_.voice_activity_detector->Analyze(
1620           AudioFrameView<const float>(capture_buffer->channels(),
1621                                       capture_buffer->num_channels(),
1622                                       capture_buffer->num_frames()));
1623     }
1624 
1625     if (submodules_.transient_suppressor) {
1626       float transient_suppressor_voice_probability = 1.0f;
1627       switch (transient_suppressor_vad_mode_) {
1628         case TransientSuppressor::VadMode::kDefault:
1629           if (submodules_.agc_manager) {
1630             transient_suppressor_voice_probability =
1631                 submodules_.agc_manager->voice_probability();
1632           }
1633           break;
1634         case TransientSuppressor::VadMode::kRnnVad:
1635           RTC_DCHECK(voice_probability.has_value());
1636           transient_suppressor_voice_probability = *voice_probability;
1637           break;
1638         case TransientSuppressor::VadMode::kNoVad:
1639           // The transient suppressor will ignore `voice_probability`.
1640           break;
1641       }
1642       float delayed_voice_probability =
1643           submodules_.transient_suppressor->Suppress(
1644               capture_buffer->channels()[0], capture_buffer->num_frames(),
1645               capture_buffer->num_channels(),
1646               capture_buffer->split_bands_const(0)[kBand0To8kHz],
1647               capture_buffer->num_frames_per_band(),
1648               /*reference_data=*/nullptr, /*reference_length=*/0,
1649               transient_suppressor_voice_probability, capture_.key_pressed);
1650       if (voice_probability.has_value()) {
1651         *voice_probability = delayed_voice_probability;
1652       }
1653     }
1654 
1655     // Experimental APM sub-module that analyzes `capture_buffer`.
1656     if (submodules_.capture_analyzer) {
1657       submodules_.capture_analyzer->Analyze(capture_buffer);
1658     }
1659 
1660     if (submodules_.gain_controller2) {
1661       // TODO(bugs.webrtc.org/7494): Let AGC2 detect applied input volume
1662       // changes.
1663       submodules_.gain_controller2->Process(
1664           voice_probability, capture_.applied_input_volume_changed,
1665           capture_buffer);
1666     }
1667 
1668     if (submodules_.capture_post_processor) {
1669       submodules_.capture_post_processor->Process(capture_buffer);
1670     }
1671 
1672     capture_output_rms_.Analyze(rtc::ArrayView<const float>(
1673         capture_buffer->channels_const()[0],
1674         capture_nonlocked_.capture_processing_format.num_frames()));
1675     if (log_rms) {
1676       RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak();
1677       RTC_HISTOGRAM_COUNTS_LINEAR(
1678           "WebRTC.Audio.ApmCaptureOutputLevelAverageRms", levels.average, 1,
1679           RmsLevel::kMinLevelDb, 64);
1680       RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.ApmCaptureOutputLevelPeakRms",
1681                                   levels.peak, 1, RmsLevel::kMinLevelDb, 64);
1682     }
1683 
1684     // Compute echo-detector stats.
1685     if (submodules_.echo_detector) {
1686       auto ed_metrics = submodules_.echo_detector->GetMetrics();
1687       capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood;
1688       capture_.stats.residual_echo_likelihood_recent_max =
1689           ed_metrics.echo_likelihood_recent_max;
1690     }
1691   }
1692 
1693   // Compute echo-controller stats.
1694   if (submodules_.echo_controller) {
1695     auto ec_metrics = submodules_.echo_controller->GetMetrics();
1696     capture_.stats.echo_return_loss = ec_metrics.echo_return_loss;
1697     capture_.stats.echo_return_loss_enhancement =
1698         ec_metrics.echo_return_loss_enhancement;
1699     capture_.stats.delay_ms = ec_metrics.delay_ms;
1700   }
1701 
1702   // Pass stats for reporting.
1703   stats_reporter_.UpdateStatistics(capture_.stats);
1704 
1705   UpdateRecommendedInputVolumeLocked();
1706   if (capture_.recommended_input_volume.has_value()) {
1707     recommended_input_volume_stats_reporter_.UpdateStatistics(
1708         *capture_.recommended_input_volume);
1709   }
1710 
1711   if (submodules_.capture_levels_adjuster) {
1712     submodules_.capture_levels_adjuster->ApplyPostLevelAdjustment(
1713         *capture_buffer);
1714 
1715     if (config_.capture_level_adjustment.analog_mic_gain_emulation.enabled) {
1716       // If the input volume emulation is used, retrieve the recommended input
1717       // volume and set that to emulate the input volume on the next processed
1718       // audio frame.
1719       RTC_DCHECK(capture_.recommended_input_volume.has_value());
1720       submodules_.capture_levels_adjuster->SetAnalogMicGainLevel(
1721           *capture_.recommended_input_volume);
1722     }
1723   }
1724 
1725   // Temporarily set the output to zero after the stream has been unmuted
1726   // (capture output is again used). The purpose of this is to avoid clicks and
1727   // artefacts in the audio that results when the processing again is
1728   // reactivated after unmuting.
1729   if (!capture_.capture_output_used_last_frame &&
1730       capture_.capture_output_used) {
1731     for (size_t ch = 0; ch < capture_buffer->num_channels(); ++ch) {
1732       rtc::ArrayView<float> channel_view(capture_buffer->channels()[ch],
1733                                          capture_buffer->num_frames());
1734       std::fill(channel_view.begin(), channel_view.end(), 0.f);
1735     }
1736   }
1737   capture_.capture_output_used_last_frame = capture_.capture_output_used;
1738 
1739   capture_.was_stream_delay_set = false;
1740 
1741   data_dumper_->DumpRaw("recommended_input_volume",
1742                         capture_.recommended_input_volume.value_or(
1743                             kUnspecifiedDataDumpInputVolume));
1744 
1745   return kNoError;
1746 }
1747 
AnalyzeReverseStream(const float * const * data,const StreamConfig & reverse_config)1748 int AudioProcessingImpl::AnalyzeReverseStream(
1749     const float* const* data,
1750     const StreamConfig& reverse_config) {
1751   TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig");
1752   MutexLock lock(&mutex_render_);
1753   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1754   RTC_DCHECK(data);
1755   for (size_t i = 0; i < reverse_config.num_channels(); ++i) {
1756     RTC_DCHECK(data[i]);
1757   }
1758   RETURN_ON_ERR(
1759       AudioFormatValidityToErrorCode(ValidateAudioFormat(reverse_config)));
1760 
1761   MaybeInitializeRender(reverse_config, reverse_config);
1762   return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
1763 }
1764 
ProcessReverseStream(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config,float * const * dest)1765 int AudioProcessingImpl::ProcessReverseStream(const float* const* src,
1766                                               const StreamConfig& input_config,
1767                                               const StreamConfig& output_config,
1768                                               float* const* dest) {
1769   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
1770   MutexLock lock(&mutex_render_);
1771   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1772   RETURN_ON_ERR(
1773       HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1774 
1775   MaybeInitializeRender(input_config, output_config);
1776 
1777   RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
1778 
1779   if (submodule_states_.RenderMultiBandProcessingActive() ||
1780       submodule_states_.RenderFullBandProcessingActive()) {
1781     render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
1782                                  dest);
1783   } else if (formats_.api_format.reverse_input_stream() !=
1784              formats_.api_format.reverse_output_stream()) {
1785     render_.render_converter->Convert(src, input_config.num_samples(), dest,
1786                                       output_config.num_samples());
1787   } else {
1788     CopyAudioIfNeeded(src, input_config.num_frames(),
1789                       input_config.num_channels(), dest);
1790   }
1791 
1792   return kNoError;
1793 }
1794 
AnalyzeReverseStreamLocked(const float * const * src,const StreamConfig & input_config,const StreamConfig & output_config)1795 int AudioProcessingImpl::AnalyzeReverseStreamLocked(
1796     const float* const* src,
1797     const StreamConfig& input_config,
1798     const StreamConfig& output_config) {
1799   if (aec_dump_) {
1800     const size_t channel_size =
1801         formats_.api_format.reverse_input_stream().num_frames();
1802     const size_t num_channels =
1803         formats_.api_format.reverse_input_stream().num_channels();
1804     aec_dump_->WriteRenderStreamMessage(
1805         AudioFrameView<const float>(src, num_channels, channel_size));
1806   }
1807   render_.render_audio->CopyFrom(src,
1808                                  formats_.api_format.reverse_input_stream());
1809   return ProcessRenderStreamLocked();
1810 }
1811 
ProcessReverseStream(const int16_t * const src,const StreamConfig & input_config,const StreamConfig & output_config,int16_t * const dest)1812 int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
1813                                               const StreamConfig& input_config,
1814                                               const StreamConfig& output_config,
1815                                               int16_t* const dest) {
1816   TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
1817 
1818   MutexLock lock(&mutex_render_);
1819   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1820 
1821   RETURN_ON_ERR(
1822       HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
1823   MaybeInitializeRender(input_config, output_config);
1824 
1825   if (aec_dump_) {
1826     aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),
1827                                         input_config.num_channels());
1828   }
1829 
1830   render_.render_audio->CopyFrom(src, input_config);
1831   RETURN_ON_ERR(ProcessRenderStreamLocked());
1832   if (submodule_states_.RenderMultiBandProcessingActive() ||
1833       submodule_states_.RenderFullBandProcessingActive()) {
1834     render_.render_audio->CopyTo(output_config, dest);
1835   }
1836   return kNoError;
1837 }
1838 
ProcessRenderStreamLocked()1839 int AudioProcessingImpl::ProcessRenderStreamLocked() {
1840   AudioBuffer* render_buffer = render_.render_audio.get();  // For brevity.
1841 
1842   HandleRenderRuntimeSettings();
1843   DenormalDisabler denormal_disabler(use_denormal_disabler_);
1844 
1845   if (submodules_.render_pre_processor) {
1846     submodules_.render_pre_processor->Process(render_buffer);
1847   }
1848 
1849   QueueNonbandedRenderAudio(render_buffer);
1850 
1851   if (submodule_states_.RenderMultiBandSubModulesActive() &&
1852       SampleRateSupportsMultiBand(
1853           formats_.render_processing_format.sample_rate_hz())) {
1854     render_buffer->SplitIntoFrequencyBands();
1855   }
1856 
1857   if (submodule_states_.RenderMultiBandSubModulesActive()) {
1858     QueueBandedRenderAudio(render_buffer);
1859   }
1860 
1861   // TODO(peah): Perform the queuing inside QueueRenderAudiuo().
1862   if (submodules_.echo_controller) {
1863     submodules_.echo_controller->AnalyzeRender(render_buffer);
1864   }
1865 
1866   if (submodule_states_.RenderMultiBandProcessingActive() &&
1867       SampleRateSupportsMultiBand(
1868           formats_.render_processing_format.sample_rate_hz())) {
1869     render_buffer->MergeFrequencyBands();
1870   }
1871 
1872   return kNoError;
1873 }
1874 
set_stream_delay_ms(int delay)1875 int AudioProcessingImpl::set_stream_delay_ms(int delay) {
1876   MutexLock lock(&mutex_capture_);
1877   Error retval = kNoError;
1878   capture_.was_stream_delay_set = true;
1879 
1880   if (delay < 0) {
1881     delay = 0;
1882     retval = kBadStreamParameterWarning;
1883   }
1884 
1885   // TODO(ajm): the max is rather arbitrarily chosen; investigate.
1886   if (delay > 500) {
1887     delay = 500;
1888     retval = kBadStreamParameterWarning;
1889   }
1890 
1891   capture_nonlocked_.stream_delay_ms = delay;
1892   return retval;
1893 }
1894 
GetLinearAecOutput(rtc::ArrayView<std::array<float,160>> linear_output) const1895 bool AudioProcessingImpl::GetLinearAecOutput(
1896     rtc::ArrayView<std::array<float, 160>> linear_output) const {
1897   MutexLock lock(&mutex_capture_);
1898   AudioBuffer* linear_aec_buffer = capture_.linear_aec_output.get();
1899 
1900   RTC_DCHECK(linear_aec_buffer);
1901   if (linear_aec_buffer) {
1902     RTC_DCHECK_EQ(1, linear_aec_buffer->num_bands());
1903     RTC_DCHECK_EQ(linear_output.size(), linear_aec_buffer->num_channels());
1904 
1905     for (size_t ch = 0; ch < linear_aec_buffer->num_channels(); ++ch) {
1906       RTC_DCHECK_EQ(linear_output[ch].size(), linear_aec_buffer->num_frames());
1907       rtc::ArrayView<const float> channel_view =
1908           rtc::ArrayView<const float>(linear_aec_buffer->channels_const()[ch],
1909                                       linear_aec_buffer->num_frames());
1910       FloatS16ToFloat(channel_view.data(), channel_view.size(),
1911                       linear_output[ch].data());
1912     }
1913     return true;
1914   }
1915   RTC_LOG(LS_ERROR) << "No linear AEC output available";
1916   RTC_DCHECK_NOTREACHED();
1917   return false;
1918 }
1919 
stream_delay_ms() const1920 int AudioProcessingImpl::stream_delay_ms() const {
1921   // Used as callback from submodules, hence locking is not allowed.
1922   return capture_nonlocked_.stream_delay_ms;
1923 }
1924 
set_stream_key_pressed(bool key_pressed)1925 void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) {
1926   MutexLock lock(&mutex_capture_);
1927   capture_.key_pressed = key_pressed;
1928 }
1929 
set_stream_analog_level(int level)1930 void AudioProcessingImpl::set_stream_analog_level(int level) {
1931   MutexLock lock_capture(&mutex_capture_);
1932   set_stream_analog_level_locked(level);
1933 }
1934 
set_stream_analog_level_locked(int level)1935 void AudioProcessingImpl::set_stream_analog_level_locked(int level) {
1936   capture_.applied_input_volume_changed =
1937       capture_.applied_input_volume.has_value() &&
1938       *capture_.applied_input_volume != level;
1939   capture_.applied_input_volume = level;
1940 
1941   // Invalidate any previously recommended input volume which will be updated by
1942   // `ProcessStream()`.
1943   capture_.recommended_input_volume = absl::nullopt;
1944 
1945   if (submodules_.agc_manager) {
1946     submodules_.agc_manager->set_stream_analog_level(level);
1947     return;
1948   }
1949 
1950   if (submodules_.gain_control) {
1951     int error = submodules_.gain_control->set_stream_analog_level(level);
1952     RTC_DCHECK_EQ(kNoError, error);
1953     return;
1954   }
1955 }
1956 
recommended_stream_analog_level() const1957 int AudioProcessingImpl::recommended_stream_analog_level() const {
1958   MutexLock lock_capture(&mutex_capture_);
1959   if (!capture_.applied_input_volume.has_value()) {
1960     RTC_LOG(LS_ERROR) << "set_stream_analog_level has not been called";
1961   }
1962   // Input volume to recommend when `set_stream_analog_level()` is not called.
1963   constexpr int kFallBackInputVolume = 255;
1964   // When APM has no input volume to recommend, return the latest applied input
1965   // volume that has been observed in order to possibly produce no input volume
1966   // change. If no applied input volume has been observed, return a fall-back
1967   // value.
1968   return capture_.recommended_input_volume.value_or(
1969       capture_.applied_input_volume.value_or(kFallBackInputVolume));
1970 }
1971 
UpdateRecommendedInputVolumeLocked()1972 void AudioProcessingImpl::UpdateRecommendedInputVolumeLocked() {
1973   if (!capture_.applied_input_volume.has_value()) {
1974     // When `set_stream_analog_level()` is not called, no input level can be
1975     // recommended.
1976     capture_.recommended_input_volume = absl::nullopt;
1977     return;
1978   }
1979 
1980   if (submodules_.agc_manager) {
1981     capture_.recommended_input_volume =
1982         submodules_.agc_manager->recommended_analog_level();
1983     return;
1984   }
1985 
1986   if (submodules_.gain_control) {
1987     capture_.recommended_input_volume =
1988         submodules_.gain_control->stream_analog_level();
1989     return;
1990   }
1991 
1992   if (submodules_.gain_controller2 &&
1993       config_.gain_controller2.input_volume_controller.enabled) {
1994     capture_.recommended_input_volume =
1995         submodules_.gain_controller2->GetRecommendedInputVolume();
1996     return;
1997   }
1998 
1999   capture_.recommended_input_volume = capture_.applied_input_volume;
2000 }
2001 
CreateAndAttachAecDump(absl::string_view file_name,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)2002 bool AudioProcessingImpl::CreateAndAttachAecDump(absl::string_view file_name,
2003                                                  int64_t max_log_size_bytes,
2004                                                  rtc::TaskQueue* worker_queue) {
2005   std::unique_ptr<AecDump> aec_dump =
2006       AecDumpFactory::Create(file_name, max_log_size_bytes, worker_queue);
2007   if (!aec_dump) {
2008     return false;
2009   }
2010 
2011   AttachAecDump(std::move(aec_dump));
2012   return true;
2013 }
2014 
CreateAndAttachAecDump(FILE * handle,int64_t max_log_size_bytes,rtc::TaskQueue * worker_queue)2015 bool AudioProcessingImpl::CreateAndAttachAecDump(FILE* handle,
2016                                                  int64_t max_log_size_bytes,
2017                                                  rtc::TaskQueue* worker_queue) {
2018   std::unique_ptr<AecDump> aec_dump =
2019       AecDumpFactory::Create(handle, max_log_size_bytes, worker_queue);
2020   if (!aec_dump) {
2021     return false;
2022   }
2023 
2024   AttachAecDump(std::move(aec_dump));
2025   return true;
2026 }
2027 
AttachAecDump(std::unique_ptr<AecDump> aec_dump)2028 void AudioProcessingImpl::AttachAecDump(std::unique_ptr<AecDump> aec_dump) {
2029   RTC_DCHECK(aec_dump);
2030   MutexLock lock_render(&mutex_render_);
2031   MutexLock lock_capture(&mutex_capture_);
2032 
2033   // The previously attached AecDump will be destroyed with the
2034   // 'aec_dump' parameter, which is after locks are released.
2035   aec_dump_.swap(aec_dump);
2036   WriteAecDumpConfigMessage(true);
2037   aec_dump_->WriteInitMessage(formats_.api_format, rtc::TimeUTCMillis());
2038 }
2039 
DetachAecDump()2040 void AudioProcessingImpl::DetachAecDump() {
2041   // The d-tor of a task-queue based AecDump blocks until all pending
2042   // tasks are done. This construction avoids blocking while holding
2043   // the render and capture locks.
2044   std::unique_ptr<AecDump> aec_dump = nullptr;
2045   {
2046     MutexLock lock_render(&mutex_render_);
2047     MutexLock lock_capture(&mutex_capture_);
2048     aec_dump = std::move(aec_dump_);
2049   }
2050 }
2051 
GetConfig() const2052 AudioProcessing::Config AudioProcessingImpl::GetConfig() const {
2053   MutexLock lock_render(&mutex_render_);
2054   MutexLock lock_capture(&mutex_capture_);
2055   return config_;
2056 }
2057 
UpdateActiveSubmoduleStates()2058 bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
2059   return submodule_states_.Update(
2060       config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
2061       !!submodules_.noise_suppressor, !!submodules_.gain_control,
2062       !!submodules_.gain_controller2, !!submodules_.voice_activity_detector,
2063       config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled,
2064       capture_nonlocked_.echo_controller_enabled,
2065       !!submodules_.transient_suppressor);
2066 }
2067 
InitializeTransientSuppressor()2068 void AudioProcessingImpl::InitializeTransientSuppressor() {
2069   if (config_.transient_suppression.enabled &&
2070       !constants_.transient_suppressor_forced_off) {
2071     // Attempt to create a transient suppressor, if one is not already created.
2072     if (!submodules_.transient_suppressor) {
2073       submodules_.transient_suppressor = CreateTransientSuppressor(
2074           submodule_creation_overrides_, transient_suppressor_vad_mode_,
2075           proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
2076           num_proc_channels());
2077       if (!submodules_.transient_suppressor) {
2078         RTC_LOG(LS_WARNING)
2079             << "No transient suppressor created (probably disabled)";
2080       }
2081     } else {
2082       submodules_.transient_suppressor->Initialize(
2083           proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
2084           num_proc_channels());
2085     }
2086   } else {
2087     submodules_.transient_suppressor.reset();
2088   }
2089 }
2090 
InitializeHighPassFilter(bool forced_reset)2091 void AudioProcessingImpl::InitializeHighPassFilter(bool forced_reset) {
2092   bool high_pass_filter_needed_by_aec =
2093       config_.echo_canceller.enabled &&
2094       config_.echo_canceller.enforce_high_pass_filtering &&
2095       !config_.echo_canceller.mobile_mode;
2096   if (submodule_states_.HighPassFilteringRequired() ||
2097       high_pass_filter_needed_by_aec) {
2098     bool use_full_band = config_.high_pass_filter.apply_in_full_band &&
2099                          !constants_.enforce_split_band_hpf;
2100     int rate = use_full_band ? proc_fullband_sample_rate_hz()
2101                              : proc_split_sample_rate_hz();
2102     size_t num_channels =
2103         use_full_band ? num_output_channels() : num_proc_channels();
2104 
2105     if (!submodules_.high_pass_filter ||
2106         rate != submodules_.high_pass_filter->sample_rate_hz() ||
2107         forced_reset ||
2108         num_channels != submodules_.high_pass_filter->num_channels()) {
2109       submodules_.high_pass_filter.reset(
2110           new HighPassFilter(rate, num_channels));
2111     }
2112   } else {
2113     submodules_.high_pass_filter.reset();
2114   }
2115 }
2116 
InitializeEchoController()2117 void AudioProcessingImpl::InitializeEchoController() {
2118   bool use_echo_controller =
2119       echo_control_factory_ ||
2120       (config_.echo_canceller.enabled && !config_.echo_canceller.mobile_mode);
2121 
2122   if (use_echo_controller) {
2123     // Create and activate the echo controller.
2124     if (echo_control_factory_) {
2125       submodules_.echo_controller = echo_control_factory_->Create(
2126           proc_sample_rate_hz(), num_reverse_channels(), num_proc_channels());
2127       RTC_DCHECK(submodules_.echo_controller);
2128     } else {
2129       EchoCanceller3Config config;
2130       absl::optional<EchoCanceller3Config> multichannel_config;
2131       if (use_setup_specific_default_aec3_config_) {
2132         multichannel_config = EchoCanceller3::CreateDefaultMultichannelConfig();
2133       }
2134       submodules_.echo_controller = std::make_unique<EchoCanceller3>(
2135           config, multichannel_config, proc_sample_rate_hz(),
2136           num_reverse_channels(), num_proc_channels());
2137     }
2138 
2139     // Setup the storage for returning the linear AEC output.
2140     if (config_.echo_canceller.export_linear_aec_output) {
2141       constexpr int kLinearOutputRateHz = 16000;
2142       capture_.linear_aec_output = std::make_unique<AudioBuffer>(
2143           kLinearOutputRateHz, num_proc_channels(), kLinearOutputRateHz,
2144           num_proc_channels(), kLinearOutputRateHz, num_proc_channels());
2145     } else {
2146       capture_.linear_aec_output.reset();
2147     }
2148 
2149     capture_nonlocked_.echo_controller_enabled = true;
2150 
2151     submodules_.echo_control_mobile.reset();
2152     aecm_render_signal_queue_.reset();
2153     return;
2154   }
2155 
2156   submodules_.echo_controller.reset();
2157   capture_nonlocked_.echo_controller_enabled = false;
2158   capture_.linear_aec_output.reset();
2159 
2160   if (!config_.echo_canceller.enabled) {
2161     submodules_.echo_control_mobile.reset();
2162     aecm_render_signal_queue_.reset();
2163     return;
2164   }
2165 
2166   if (config_.echo_canceller.mobile_mode) {
2167     // Create and activate AECM.
2168     size_t max_element_size =
2169         std::max(static_cast<size_t>(1),
2170                  kMaxAllowedValuesOfSamplesPerBand *
2171                      EchoControlMobileImpl::NumCancellersRequired(
2172                          num_output_channels(), num_reverse_channels()));
2173 
2174     std::vector<int16_t> template_queue_element(max_element_size);
2175 
2176     aecm_render_signal_queue_.reset(
2177         new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>(
2178             kMaxNumFramesToBuffer, template_queue_element,
2179             RenderQueueItemVerifier<int16_t>(max_element_size)));
2180 
2181     aecm_render_queue_buffer_.resize(max_element_size);
2182     aecm_capture_queue_buffer_.resize(max_element_size);
2183 
2184     submodules_.echo_control_mobile.reset(new EchoControlMobileImpl());
2185 
2186     submodules_.echo_control_mobile->Initialize(proc_split_sample_rate_hz(),
2187                                                 num_reverse_channels(),
2188                                                 num_output_channels());
2189     return;
2190   }
2191 
2192   submodules_.echo_control_mobile.reset();
2193   aecm_render_signal_queue_.reset();
2194 }
2195 
InitializeGainController1()2196 void AudioProcessingImpl::InitializeGainController1() {
2197   if (config_.gain_controller2.enabled &&
2198       config_.gain_controller2.input_volume_controller.enabled &&
2199       config_.gain_controller1.enabled &&
2200       (config_.gain_controller1.mode ==
2201            AudioProcessing::Config::GainController1::kAdaptiveAnalog ||
2202        config_.gain_controller1.analog_gain_controller.enabled)) {
2203     RTC_LOG(LS_ERROR) << "APM configuration not valid: "
2204                       << "Multiple input volume controllers enabled.";
2205   }
2206 
2207   if (!config_.gain_controller1.enabled) {
2208     submodules_.agc_manager.reset();
2209     submodules_.gain_control.reset();
2210     return;
2211   }
2212 
2213   RTC_HISTOGRAM_BOOLEAN(
2214       "WebRTC.Audio.GainController.Analog.Enabled",
2215       config_.gain_controller1.analog_gain_controller.enabled);
2216 
2217   if (!submodules_.gain_control) {
2218     submodules_.gain_control.reset(new GainControlImpl());
2219   }
2220 
2221   submodules_.gain_control->Initialize(num_proc_channels(),
2222                                        proc_sample_rate_hz());
2223   if (!config_.gain_controller1.analog_gain_controller.enabled) {
2224     int error = submodules_.gain_control->set_mode(
2225         Agc1ConfigModeToInterfaceMode(config_.gain_controller1.mode));
2226     RTC_DCHECK_EQ(kNoError, error);
2227     error = submodules_.gain_control->set_target_level_dbfs(
2228         config_.gain_controller1.target_level_dbfs);
2229     RTC_DCHECK_EQ(kNoError, error);
2230     error = submodules_.gain_control->set_compression_gain_db(
2231         config_.gain_controller1.compression_gain_db);
2232     RTC_DCHECK_EQ(kNoError, error);
2233     error = submodules_.gain_control->enable_limiter(
2234         config_.gain_controller1.enable_limiter);
2235     RTC_DCHECK_EQ(kNoError, error);
2236     constexpr int kAnalogLevelMinimum = 0;
2237     constexpr int kAnalogLevelMaximum = 255;
2238     error = submodules_.gain_control->set_analog_level_limits(
2239         kAnalogLevelMinimum, kAnalogLevelMaximum);
2240     RTC_DCHECK_EQ(kNoError, error);
2241 
2242     submodules_.agc_manager.reset();
2243     return;
2244   }
2245 
2246   if (!submodules_.agc_manager.get() ||
2247       submodules_.agc_manager->num_channels() !=
2248           static_cast<int>(num_proc_channels())) {
2249     int stream_analog_level = -1;
2250     const bool re_creation = !!submodules_.agc_manager;
2251     if (re_creation) {
2252       stream_analog_level = submodules_.agc_manager->recommended_analog_level();
2253     }
2254     submodules_.agc_manager.reset(new AgcManagerDirect(
2255         num_proc_channels(), config_.gain_controller1.analog_gain_controller));
2256     if (re_creation) {
2257       submodules_.agc_manager->set_stream_analog_level(stream_analog_level);
2258     }
2259   }
2260   submodules_.agc_manager->Initialize();
2261   submodules_.agc_manager->SetupDigitalGainControl(*submodules_.gain_control);
2262   submodules_.agc_manager->HandleCaptureOutputUsedChange(
2263       capture_.capture_output_used);
2264 }
2265 
InitializeGainController2(bool config_has_changed)2266 void AudioProcessingImpl::InitializeGainController2(bool config_has_changed) {
2267   if (!config_has_changed) {
2268     return;
2269   }
2270   if (!config_.gain_controller2.enabled) {
2271     submodules_.gain_controller2.reset();
2272     return;
2273   }
2274   if (!submodules_.gain_controller2 || config_has_changed) {
2275     const bool use_internal_vad =
2276         transient_suppressor_vad_mode_ != TransientSuppressor::VadMode::kRnnVad;
2277     submodules_.gain_controller2 = std::make_unique<GainController2>(
2278         config_.gain_controller2,
2279         input_volume_controller_config_override_.value_or(
2280             InputVolumeController::Config{}),
2281         proc_fullband_sample_rate_hz(), num_input_channels(), use_internal_vad);
2282     submodules_.gain_controller2->SetCaptureOutputUsed(
2283         capture_.capture_output_used);
2284   }
2285 }
2286 
InitializeVoiceActivityDetector(bool config_has_changed)2287 void AudioProcessingImpl::InitializeVoiceActivityDetector(
2288     bool config_has_changed) {
2289   if (!config_has_changed) {
2290     return;
2291   }
2292   const bool use_vad =
2293       transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
2294       config_.gain_controller2.enabled &&
2295       config_.gain_controller2.adaptive_digital.enabled;
2296   if (!use_vad) {
2297     submodules_.voice_activity_detector.reset();
2298     return;
2299   }
2300   if (!submodules_.voice_activity_detector || config_has_changed) {
2301     RTC_DCHECK(!!submodules_.gain_controller2);
2302     // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
2303     submodules_.voice_activity_detector =
2304         std::make_unique<VoiceActivityDetectorWrapper>(
2305             config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
2306             submodules_.gain_controller2->GetCpuFeatures(),
2307             proc_fullband_sample_rate_hz());
2308   }
2309 }
2310 
InitializeNoiseSuppressor()2311 void AudioProcessingImpl::InitializeNoiseSuppressor() {
2312   submodules_.noise_suppressor.reset();
2313 
2314   if (config_.noise_suppression.enabled) {
2315     auto map_level =
2316         [](AudioProcessing::Config::NoiseSuppression::Level level) {
2317           using NoiseSuppresionConfig =
2318               AudioProcessing::Config::NoiseSuppression;
2319           switch (level) {
2320             case NoiseSuppresionConfig::kLow:
2321               return NsConfig::SuppressionLevel::k6dB;
2322             case NoiseSuppresionConfig::kModerate:
2323               return NsConfig::SuppressionLevel::k12dB;
2324             case NoiseSuppresionConfig::kHigh:
2325               return NsConfig::SuppressionLevel::k18dB;
2326             case NoiseSuppresionConfig::kVeryHigh:
2327               return NsConfig::SuppressionLevel::k21dB;
2328           }
2329           RTC_CHECK_NOTREACHED();
2330         };
2331 
2332     NsConfig cfg;
2333     cfg.target_level = map_level(config_.noise_suppression.level);
2334     submodules_.noise_suppressor = std::make_unique<NoiseSuppressor>(
2335         cfg, proc_sample_rate_hz(), num_proc_channels());
2336   }
2337 }
2338 
InitializeCaptureLevelsAdjuster()2339 void AudioProcessingImpl::InitializeCaptureLevelsAdjuster() {
2340   if (config_.pre_amplifier.enabled ||
2341       config_.capture_level_adjustment.enabled) {
2342     // Use both the pre-amplifier and the capture level adjustment gains as
2343     // pre-gains.
2344     float pre_gain = 1.f;
2345     if (config_.pre_amplifier.enabled) {
2346       pre_gain *= config_.pre_amplifier.fixed_gain_factor;
2347     }
2348     if (config_.capture_level_adjustment.enabled) {
2349       pre_gain *= config_.capture_level_adjustment.pre_gain_factor;
2350     }
2351 
2352     submodules_.capture_levels_adjuster =
2353         std::make_unique<CaptureLevelsAdjuster>(
2354             config_.capture_level_adjustment.analog_mic_gain_emulation.enabled,
2355             config_.capture_level_adjustment.analog_mic_gain_emulation
2356                 .initial_level,
2357             pre_gain, config_.capture_level_adjustment.post_gain_factor);
2358   } else {
2359     submodules_.capture_levels_adjuster.reset();
2360   }
2361 }
2362 
InitializeResidualEchoDetector()2363 void AudioProcessingImpl::InitializeResidualEchoDetector() {
2364   if (submodules_.echo_detector) {
2365     submodules_.echo_detector->Initialize(
2366         proc_fullband_sample_rate_hz(), 1,
2367         formats_.render_processing_format.sample_rate_hz(), 1);
2368   }
2369 }
2370 
InitializeAnalyzer()2371 void AudioProcessingImpl::InitializeAnalyzer() {
2372   if (submodules_.capture_analyzer) {
2373     submodules_.capture_analyzer->Initialize(proc_fullband_sample_rate_hz(),
2374                                              num_proc_channels());
2375   }
2376 }
2377 
InitializePostProcessor()2378 void AudioProcessingImpl::InitializePostProcessor() {
2379   if (submodules_.capture_post_processor) {
2380     submodules_.capture_post_processor->Initialize(
2381         proc_fullband_sample_rate_hz(), num_proc_channels());
2382   }
2383 }
2384 
InitializePreProcessor()2385 void AudioProcessingImpl::InitializePreProcessor() {
2386   if (submodules_.render_pre_processor) {
2387     submodules_.render_pre_processor->Initialize(
2388         formats_.render_processing_format.sample_rate_hz(),
2389         formats_.render_processing_format.num_channels());
2390   }
2391 }
2392 
WriteAecDumpConfigMessage(bool forced)2393 void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) {
2394   if (!aec_dump_) {
2395     return;
2396   }
2397 
2398   std::string experiments_description = "";
2399   // TODO(peah): Add semicolon-separated concatenations of experiment
2400   // descriptions for other submodules.
2401   if (!!submodules_.capture_post_processor) {
2402     experiments_description += "CapturePostProcessor;";
2403   }
2404   if (!!submodules_.render_pre_processor) {
2405     experiments_description += "RenderPreProcessor;";
2406   }
2407   if (capture_nonlocked_.echo_controller_enabled) {
2408     experiments_description += "EchoController;";
2409   }
2410   if (config_.gain_controller2.enabled) {
2411     experiments_description += "GainController2;";
2412   }
2413 
2414   InternalAPMConfig apm_config;
2415 
2416   apm_config.aec_enabled = config_.echo_canceller.enabled;
2417   apm_config.aec_delay_agnostic_enabled = false;
2418   apm_config.aec_extended_filter_enabled = false;
2419   apm_config.aec_suppression_level = 0;
2420 
2421   apm_config.aecm_enabled = !!submodules_.echo_control_mobile;
2422   apm_config.aecm_comfort_noise_enabled =
2423       submodules_.echo_control_mobile &&
2424       submodules_.echo_control_mobile->is_comfort_noise_enabled();
2425   apm_config.aecm_routing_mode =
2426       submodules_.echo_control_mobile
2427           ? static_cast<int>(submodules_.echo_control_mobile->routing_mode())
2428           : 0;
2429 
2430   apm_config.agc_enabled = !!submodules_.gain_control;
2431 
2432   apm_config.agc_mode = submodules_.gain_control
2433                             ? static_cast<int>(submodules_.gain_control->mode())
2434                             : GainControl::kAdaptiveAnalog;
2435   apm_config.agc_limiter_enabled =
2436       submodules_.gain_control ? submodules_.gain_control->is_limiter_enabled()
2437                                : false;
2438   apm_config.noise_robust_agc_enabled = !!submodules_.agc_manager;
2439 
2440   apm_config.hpf_enabled = config_.high_pass_filter.enabled;
2441 
2442   apm_config.ns_enabled = config_.noise_suppression.enabled;
2443   apm_config.ns_level = static_cast<int>(config_.noise_suppression.level);
2444 
2445   apm_config.transient_suppression_enabled =
2446       config_.transient_suppression.enabled;
2447   apm_config.experiments_description = experiments_description;
2448   apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled;
2449   apm_config.pre_amplifier_fixed_gain_factor =
2450       config_.pre_amplifier.fixed_gain_factor;
2451 
2452   if (!forced && apm_config == apm_config_for_aec_dump_) {
2453     return;
2454   }
2455   aec_dump_->WriteConfig(apm_config);
2456   apm_config_for_aec_dump_ = apm_config;
2457 }
2458 
RecordUnprocessedCaptureStream(const float * const * src)2459 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
2460     const float* const* src) {
2461   RTC_DCHECK(aec_dump_);
2462   WriteAecDumpConfigMessage(false);
2463 
2464   const size_t channel_size = formats_.api_format.input_stream().num_frames();
2465   const size_t num_channels = formats_.api_format.input_stream().num_channels();
2466   aec_dump_->AddCaptureStreamInput(
2467       AudioFrameView<const float>(src, num_channels, channel_size));
2468   RecordAudioProcessingState();
2469 }
2470 
RecordUnprocessedCaptureStream(const int16_t * const data,const StreamConfig & config)2471 void AudioProcessingImpl::RecordUnprocessedCaptureStream(
2472     const int16_t* const data,
2473     const StreamConfig& config) {
2474   RTC_DCHECK(aec_dump_);
2475   WriteAecDumpConfigMessage(false);
2476 
2477   aec_dump_->AddCaptureStreamInput(data, config.num_channels(),
2478                                    config.num_frames());
2479   RecordAudioProcessingState();
2480 }
2481 
RecordProcessedCaptureStream(const float * const * processed_capture_stream)2482 void AudioProcessingImpl::RecordProcessedCaptureStream(
2483     const float* const* processed_capture_stream) {
2484   RTC_DCHECK(aec_dump_);
2485 
2486   const size_t channel_size = formats_.api_format.output_stream().num_frames();
2487   const size_t num_channels =
2488       formats_.api_format.output_stream().num_channels();
2489   aec_dump_->AddCaptureStreamOutput(AudioFrameView<const float>(
2490       processed_capture_stream, num_channels, channel_size));
2491   aec_dump_->WriteCaptureStreamMessage();
2492 }
2493 
RecordProcessedCaptureStream(const int16_t * const data,const StreamConfig & config)2494 void AudioProcessingImpl::RecordProcessedCaptureStream(
2495     const int16_t* const data,
2496     const StreamConfig& config) {
2497   RTC_DCHECK(aec_dump_);
2498 
2499   aec_dump_->AddCaptureStreamOutput(data, config.num_channels(),
2500                                     config.num_frames());
2501   aec_dump_->WriteCaptureStreamMessage();
2502 }
2503 
RecordAudioProcessingState()2504 void AudioProcessingImpl::RecordAudioProcessingState() {
2505   RTC_DCHECK(aec_dump_);
2506   AecDump::AudioProcessingState audio_proc_state;
2507   audio_proc_state.delay = capture_nonlocked_.stream_delay_ms;
2508   audio_proc_state.drift = 0;
2509   audio_proc_state.applied_input_volume = capture_.applied_input_volume;
2510   audio_proc_state.keypress = capture_.key_pressed;
2511   aec_dump_->AddAudioProcessingState(audio_proc_state);
2512 }
2513 
ApmCaptureState()2514 AudioProcessingImpl::ApmCaptureState::ApmCaptureState()
2515     : was_stream_delay_set(false),
2516       capture_output_used(true),
2517       capture_output_used_last_frame(true),
2518       key_pressed(false),
2519       capture_processing_format(kSampleRate16kHz),
2520       split_rate(kSampleRate16kHz),
2521       echo_path_gain_change(false),
2522       prev_pre_adjustment_gain(-1.0f),
2523       playout_volume(-1),
2524       prev_playout_volume(-1),
2525       applied_input_volume_changed(false) {}
2526 
2527 AudioProcessingImpl::ApmCaptureState::~ApmCaptureState() = default;
2528 
2529 AudioProcessingImpl::ApmRenderState::ApmRenderState() = default;
2530 
2531 AudioProcessingImpl::ApmRenderState::~ApmRenderState() = default;
2532 
ApmStatsReporter()2533 AudioProcessingImpl::ApmStatsReporter::ApmStatsReporter()
2534     : stats_message_queue_(1) {}
2535 
2536 AudioProcessingImpl::ApmStatsReporter::~ApmStatsReporter() = default;
2537 
GetStatistics()2538 AudioProcessingStats AudioProcessingImpl::ApmStatsReporter::GetStatistics() {
2539   MutexLock lock_stats(&mutex_stats_);
2540   bool new_stats_available = stats_message_queue_.Remove(&cached_stats_);
2541   // If the message queue is full, return the cached stats.
2542   static_cast<void>(new_stats_available);
2543 
2544   return cached_stats_;
2545 }
2546 
UpdateStatistics(const AudioProcessingStats & new_stats)2547 void AudioProcessingImpl::ApmStatsReporter::UpdateStatistics(
2548     const AudioProcessingStats& new_stats) {
2549   AudioProcessingStats stats_to_queue = new_stats;
2550   bool stats_message_passed = stats_message_queue_.Insert(&stats_to_queue);
2551   // If the message queue is full, discard the new stats.
2552   static_cast<void>(stats_message_passed);
2553 }
2554 
2555 }  // namespace webrtc
2556