• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/residual_echo_estimator.h"
12 
13 #include <stddef.h>
14 
15 #include <algorithm>
16 #include <vector>
17 
18 #include "api/array_view.h"
19 #include "modules/audio_processing/aec3/reverb_model.h"
20 #include "rtc_base/checks.h"
21 #include "system_wrappers/include/field_trial.h"
22 
23 namespace webrtc {
24 namespace {
25 
UseLowEarlyReflectionsTransparentModeGain()26 bool UseLowEarlyReflectionsTransparentModeGain() {
27   return field_trial::IsEnabled(
28       "WebRTC-Aec3UseLowEarlyReflectionsTransparentModeGain");
29 }
30 
UseLowLateReflectionsTransparentModeGain()31 bool UseLowLateReflectionsTransparentModeGain() {
32   return field_trial::IsEnabled(
33       "WebRTC-Aec3UseLowLateReflectionsTransparentModeGain");
34 }
35 
UseLowEarlyReflectionsDefaultGain()36 bool UseLowEarlyReflectionsDefaultGain() {
37   return field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain");
38 }
39 
UseLowLateReflectionsDefaultGain()40 bool UseLowLateReflectionsDefaultGain() {
41   return field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain");
42 }
43 
ModelReverbInNonlinearMode()44 bool ModelReverbInNonlinearMode() {
45   return !field_trial::IsEnabled("WebRTC-Aec3rNonlinearModeReverbKillSwitch");
46 }
47 
48 constexpr float kDefaultTransparentModeGain = 0.01f;
49 
GetEarlyReflectionsTransparentModeGain()50 float GetEarlyReflectionsTransparentModeGain() {
51   if (UseLowEarlyReflectionsTransparentModeGain()) {
52     return 0.001f;
53   }
54   return kDefaultTransparentModeGain;
55 }
56 
GetLateReflectionsTransparentModeGain()57 float GetLateReflectionsTransparentModeGain() {
58   if (UseLowLateReflectionsTransparentModeGain()) {
59     return 0.001f;
60   }
61 
62   return kDefaultTransparentModeGain;
63 }
64 
GetEarlyReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)65 float GetEarlyReflectionsDefaultModeGain(
66     const EchoCanceller3Config::EpStrength& config) {
67   if (UseLowEarlyReflectionsDefaultGain()) {
68     return 0.1f;
69   }
70 
71   return config.default_gain;
72 }
73 
GetLateReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)74 float GetLateReflectionsDefaultModeGain(
75     const EchoCanceller3Config::EpStrength& config) {
76   if (UseLowLateReflectionsDefaultGain()) {
77     return 0.1f;
78   }
79   return config.default_gain;
80 }
81 
82 // Computes the indexes that will be used for computing spectral power over
83 // the blocks surrounding the delay.
GetRenderIndexesToAnalyze(const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,int * idx_start,int * idx_stop)84 void GetRenderIndexesToAnalyze(
85     const SpectrumBuffer& spectrum_buffer,
86     const EchoCanceller3Config::EchoModel& echo_model,
87     int filter_delay_blocks,
88     int* idx_start,
89     int* idx_stop) {
90   RTC_DCHECK(idx_start);
91   RTC_DCHECK(idx_stop);
92   size_t window_start;
93   size_t window_end;
94   window_start =
95       std::max(0, filter_delay_blocks -
96                       static_cast<int>(echo_model.render_pre_window_size));
97   window_end = filter_delay_blocks +
98                static_cast<int>(echo_model.render_post_window_size);
99   *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start);
100   *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
101 }
102 
103 // Estimates the residual echo power based on the echo return loss enhancement
104 // (ERLE) and the linear power estimate.
LinearEstimate(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)105 void LinearEstimate(
106     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
107     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
108     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
109   RTC_DCHECK_EQ(S2_linear.size(), erle.size());
110   RTC_DCHECK_EQ(S2_linear.size(), R2.size());
111 
112   const size_t num_capture_channels = R2.size();
113   for (size_t ch = 0; ch < num_capture_channels; ++ch) {
114     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
115       RTC_DCHECK_LT(0.f, erle[ch][k]);
116       R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
117     }
118   }
119 }
120 
121 // Estimates the residual echo power based on an uncertainty estimate of the
122 // echo return loss enhancement (ERLE) and the linear power estimate.
LinearEstimate(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,float erle_uncertainty,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)123 void LinearEstimate(
124     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
125     float erle_uncertainty,
126     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
127   RTC_DCHECK_EQ(S2_linear.size(), R2.size());
128 
129   const size_t num_capture_channels = R2.size();
130   for (size_t ch = 0; ch < num_capture_channels; ++ch) {
131     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
132       R2[ch][k] = S2_linear[ch][k] * erle_uncertainty;
133     }
134   }
135 }
136 
137 // Estimates the residual echo power based on the estimate of the echo path
138 // gain.
NonLinearEstimate(float echo_path_gain,const std::array<float,kFftLengthBy2Plus1> & X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)139 void NonLinearEstimate(
140     float echo_path_gain,
141     const std::array<float, kFftLengthBy2Plus1>& X2,
142     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
143   const size_t num_capture_channels = R2.size();
144   for (size_t ch = 0; ch < num_capture_channels; ++ch) {
145     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
146       R2[ch][k] = X2[k] * echo_path_gain;
147     }
148   }
149 }
150 
151 // Applies a soft noise gate to the echo generating power.
ApplyNoiseGate(const EchoCanceller3Config::EchoModel & config,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)152 void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
153                     rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
154   for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
155     if (config.noise_gate_power > X2[k]) {
156       X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
157                                         (config.noise_gate_power - X2[k]));
158     }
159   }
160 }
161 
162 // Estimates the echo generating signal power as gated maximal power over a
163 // time window.
EchoGeneratingPower(size_t num_render_channels,const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)164 void EchoGeneratingPower(size_t num_render_channels,
165                          const SpectrumBuffer& spectrum_buffer,
166                          const EchoCanceller3Config::EchoModel& echo_model,
167                          int filter_delay_blocks,
168                          rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
169   int idx_stop;
170   int idx_start;
171   GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
172                             &idx_start, &idx_stop);
173 
174   std::fill(X2.begin(), X2.end(), 0.f);
175   if (num_render_channels == 1) {
176     for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
177       for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
178         X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
179       }
180     }
181   } else {
182     for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
183       std::array<float, kFftLengthBy2Plus1> render_power;
184       render_power.fill(0.f);
185       for (size_t ch = 0; ch < num_render_channels; ++ch) {
186         const auto& channel_power = spectrum_buffer.buffer[k][ch];
187         for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
188           render_power[j] += channel_power[j];
189         }
190       }
191       for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
192         X2[j] = std::max(X2[j], render_power[j]);
193       }
194     }
195   }
196 }
197 
198 }  // namespace
199 
ResidualEchoEstimator(const EchoCanceller3Config & config,size_t num_render_channels)200 ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
201                                              size_t num_render_channels)
202     : config_(config),
203       num_render_channels_(num_render_channels),
204       early_reflections_transparent_mode_gain_(
205           GetEarlyReflectionsTransparentModeGain()),
206       late_reflections_transparent_mode_gain_(
207           GetLateReflectionsTransparentModeGain()),
208       early_reflections_general_gain_(
209           GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
210       late_reflections_general_gain_(
211           GetLateReflectionsDefaultModeGain(config_.ep_strength)),
212       model_reverb_in_nonlinear_mode_(ModelReverbInNonlinearMode()) {
213   Reset();
214 }
215 
216 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
217 
Estimate(const AecState & aec_state,const RenderBuffer & render_buffer,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> Y2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)218 void ResidualEchoEstimator::Estimate(
219     const AecState& aec_state,
220     const RenderBuffer& render_buffer,
221     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
222     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
223     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
224   RTC_DCHECK_EQ(R2.size(), Y2.size());
225   RTC_DCHECK_EQ(R2.size(), S2_linear.size());
226 
227   const size_t num_capture_channels = R2.size();
228 
229   // Estimate the power of the stationary noise in the render signal.
230   UpdateRenderNoisePower(render_buffer);
231 
232   // Estimate the residual echo power.
233   if (aec_state.UsableLinearEstimate()) {
234     // When there is saturated echo, assume the same spectral content as is
235     // present in the microphone signal.
236     if (aec_state.SaturatedEcho()) {
237       for (size_t ch = 0; ch < num_capture_channels; ++ch) {
238         std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
239       }
240     } else {
241       absl::optional<float> erle_uncertainty = aec_state.ErleUncertainty();
242       if (erle_uncertainty) {
243         LinearEstimate(S2_linear, *erle_uncertainty, R2);
244       } else {
245         LinearEstimate(S2_linear, aec_state.Erle(), R2);
246       }
247     }
248 
249     AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
250   } else {
251     const float echo_path_gain =
252         GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
253 
254     // When there is saturated echo, assume the same spectral content as is
255     // present in the microphone signal.
256     if (aec_state.SaturatedEcho()) {
257       for (size_t ch = 0; ch < num_capture_channels; ++ch) {
258         std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
259       }
260     } else {
261       // Estimate the echo generating signal power.
262       std::array<float, kFftLengthBy2Plus1> X2;
263       EchoGeneratingPower(num_render_channels_,
264                           render_buffer.GetSpectrumBuffer(), config_.echo_model,
265                           aec_state.MinDirectPathFilterDelay(), X2);
266       if (!aec_state.UseStationarityProperties()) {
267         ApplyNoiseGate(config_.echo_model, X2);
268       }
269 
270       // Subtract the stationary noise power to avoid stationary noise causing
271       // excessive echo suppression.
272       for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
273         X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
274         X2[k] = std::max(0.f, X2[k]);
275       }
276 
277       NonLinearEstimate(echo_path_gain, X2, R2);
278     }
279 
280     if (model_reverb_in_nonlinear_mode_ && !aec_state.TransparentMode()) {
281       AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
282     }
283   }
284 
285   if (aec_state.UseStationarityProperties()) {
286     // Scale the echo according to echo audibility.
287     std::array<float, kFftLengthBy2Plus1> residual_scaling;
288     aec_state.GetResidualEchoScaling(residual_scaling);
289     for (size_t ch = 0; ch < num_capture_channels; ++ch) {
290       for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
291         R2[ch][k] *= residual_scaling[k];
292       }
293     }
294   }
295 }
296 
Reset()297 void ResidualEchoEstimator::Reset() {
298   echo_reverb_.Reset();
299   X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
300   X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
301 }
302 
UpdateRenderNoisePower(const RenderBuffer & render_buffer)303 void ResidualEchoEstimator::UpdateRenderNoisePower(
304     const RenderBuffer& render_buffer) {
305   std::array<float, kFftLengthBy2Plus1> render_power_data;
306   rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
307       render_buffer.Spectrum(0);
308   rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
309       X2[/*channel=*/0];
310   if (num_render_channels_ > 1) {
311     render_power_data.fill(0.f);
312     for (size_t ch = 0; ch < num_render_channels_; ++ch) {
313       const auto& channel_power = X2[ch];
314       for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
315         render_power_data[k] += channel_power[k];
316       }
317     }
318     render_power = render_power_data;
319   }
320 
321   // Estimate the stationary noise power in a minimum statistics manner.
322   for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
323     // Decrease rapidly.
324     if (render_power[k] < X2_noise_floor_[k]) {
325       X2_noise_floor_[k] = render_power[k];
326       X2_noise_floor_counter_[k] = 0;
327     } else {
328       // Increase in a delayed, leaky manner.
329       if (X2_noise_floor_counter_[k] >=
330           static_cast<int>(config_.echo_model.noise_floor_hold)) {
331         X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
332                                       config_.echo_model.min_noise_floor_power);
333       } else {
334         ++X2_noise_floor_counter_[k];
335       }
336     }
337   }
338 }
339 
340 // Adds the estimated power of the reverb to the residual echo power.
AddReverb(ReverbType reverb_type,const AecState & aec_state,const RenderBuffer & render_buffer,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)341 void ResidualEchoEstimator::AddReverb(
342     ReverbType reverb_type,
343     const AecState& aec_state,
344     const RenderBuffer& render_buffer,
345     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
346   const size_t num_capture_channels = R2.size();
347 
348   // Choose reverb partition based on what type of echo power model is used.
349   const size_t first_reverb_partition =
350       reverb_type == ReverbType::kLinear
351           ? aec_state.FilterLengthBlocks() + 1
352           : aec_state.MinDirectPathFilterDelay() + 1;
353 
354   // Compute render power for the reverb.
355   std::array<float, kFftLengthBy2Plus1> render_power_data;
356   rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
357       render_buffer.Spectrum(first_reverb_partition);
358   rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
359       X2[/*channel=*/0];
360   if (num_render_channels_ > 1) {
361     render_power_data.fill(0.f);
362     for (size_t ch = 0; ch < num_render_channels_; ++ch) {
363       const auto& channel_power = X2[ch];
364       for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
365         render_power_data[k] += channel_power[k];
366       }
367     }
368     render_power = render_power_data;
369   }
370 
371   // Update the reverb estimate.
372   if (reverb_type == ReverbType::kLinear) {
373     echo_reverb_.UpdateReverb(render_power,
374                               aec_state.GetReverbFrequencyResponse(),
375                               aec_state.ReverbDecay());
376   } else {
377     const float echo_path_gain =
378         GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
379     echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
380                                            aec_state.ReverbDecay());
381   }
382 
383   // Add the reverb power.
384   rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
385       echo_reverb_.reverb();
386   for (size_t ch = 0; ch < num_capture_channels; ++ch) {
387     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
388       R2[ch][k] += reverb_power[k];
389     }
390   }
391 }
392 
393 // Chooses the echo path gain to use.
GetEchoPathGain(const AecState & aec_state,bool gain_for_early_reflections) const394 float ResidualEchoEstimator::GetEchoPathGain(
395     const AecState& aec_state,
396     bool gain_for_early_reflections) const {
397   float gain_amplitude;
398   if (aec_state.TransparentMode()) {
399     gain_amplitude = gain_for_early_reflections
400                          ? early_reflections_transparent_mode_gain_
401                          : late_reflections_transparent_mode_gain_;
402   } else {
403     gain_amplitude = gain_for_early_reflections
404                          ? early_reflections_general_gain_
405                          : late_reflections_general_gain_;
406   }
407   return gain_amplitude * gain_amplitude;
408 }
409 
410 }  // namespace webrtc
411