1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/aec3/residual_echo_estimator.h"
12
13 #include <stddef.h>
14
15 #include <algorithm>
16 #include <vector>
17
18 #include "api/array_view.h"
19 #include "modules/audio_processing/aec3/reverb_model.h"
20 #include "rtc_base/checks.h"
21 #include "system_wrappers/include/field_trial.h"
22
23 namespace webrtc {
24 namespace {
25
UseLowEarlyReflectionsTransparentModeGain()26 bool UseLowEarlyReflectionsTransparentModeGain() {
27 return field_trial::IsEnabled(
28 "WebRTC-Aec3UseLowEarlyReflectionsTransparentModeGain");
29 }
30
UseLowLateReflectionsTransparentModeGain()31 bool UseLowLateReflectionsTransparentModeGain() {
32 return field_trial::IsEnabled(
33 "WebRTC-Aec3UseLowLateReflectionsTransparentModeGain");
34 }
35
UseLowEarlyReflectionsDefaultGain()36 bool UseLowEarlyReflectionsDefaultGain() {
37 return field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain");
38 }
39
UseLowLateReflectionsDefaultGain()40 bool UseLowLateReflectionsDefaultGain() {
41 return field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain");
42 }
43
ModelReverbInNonlinearMode()44 bool ModelReverbInNonlinearMode() {
45 return !field_trial::IsEnabled("WebRTC-Aec3rNonlinearModeReverbKillSwitch");
46 }
47
48 constexpr float kDefaultTransparentModeGain = 0.01f;
49
GetEarlyReflectionsTransparentModeGain()50 float GetEarlyReflectionsTransparentModeGain() {
51 if (UseLowEarlyReflectionsTransparentModeGain()) {
52 return 0.001f;
53 }
54 return kDefaultTransparentModeGain;
55 }
56
GetLateReflectionsTransparentModeGain()57 float GetLateReflectionsTransparentModeGain() {
58 if (UseLowLateReflectionsTransparentModeGain()) {
59 return 0.001f;
60 }
61
62 return kDefaultTransparentModeGain;
63 }
64
GetEarlyReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)65 float GetEarlyReflectionsDefaultModeGain(
66 const EchoCanceller3Config::EpStrength& config) {
67 if (UseLowEarlyReflectionsDefaultGain()) {
68 return 0.1f;
69 }
70
71 return config.default_gain;
72 }
73
GetLateReflectionsDefaultModeGain(const EchoCanceller3Config::EpStrength & config)74 float GetLateReflectionsDefaultModeGain(
75 const EchoCanceller3Config::EpStrength& config) {
76 if (UseLowLateReflectionsDefaultGain()) {
77 return 0.1f;
78 }
79 return config.default_gain;
80 }
81
82 // Computes the indexes that will be used for computing spectral power over
83 // the blocks surrounding the delay.
GetRenderIndexesToAnalyze(const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,int * idx_start,int * idx_stop)84 void GetRenderIndexesToAnalyze(
85 const SpectrumBuffer& spectrum_buffer,
86 const EchoCanceller3Config::EchoModel& echo_model,
87 int filter_delay_blocks,
88 int* idx_start,
89 int* idx_stop) {
90 RTC_DCHECK(idx_start);
91 RTC_DCHECK(idx_stop);
92 size_t window_start;
93 size_t window_end;
94 window_start =
95 std::max(0, filter_delay_blocks -
96 static_cast<int>(echo_model.render_pre_window_size));
97 window_end = filter_delay_blocks +
98 static_cast<int>(echo_model.render_post_window_size);
99 *idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start);
100 *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
101 }
102
103 // Estimates the residual echo power based on the echo return loss enhancement
104 // (ERLE) and the linear power estimate.
LinearEstimate(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)105 void LinearEstimate(
106 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
107 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
108 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
109 RTC_DCHECK_EQ(S2_linear.size(), erle.size());
110 RTC_DCHECK_EQ(S2_linear.size(), R2.size());
111
112 const size_t num_capture_channels = R2.size();
113 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
114 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
115 RTC_DCHECK_LT(0.f, erle[ch][k]);
116 R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
117 }
118 }
119 }
120
121 // Estimates the residual echo power based on an uncertainty estimate of the
122 // echo return loss enhancement (ERLE) and the linear power estimate.
LinearEstimate(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,float erle_uncertainty,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)123 void LinearEstimate(
124 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
125 float erle_uncertainty,
126 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
127 RTC_DCHECK_EQ(S2_linear.size(), R2.size());
128
129 const size_t num_capture_channels = R2.size();
130 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
131 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
132 R2[ch][k] = S2_linear[ch][k] * erle_uncertainty;
133 }
134 }
135 }
136
137 // Estimates the residual echo power based on the estimate of the echo path
138 // gain.
NonLinearEstimate(float echo_path_gain,const std::array<float,kFftLengthBy2Plus1> & X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)139 void NonLinearEstimate(
140 float echo_path_gain,
141 const std::array<float, kFftLengthBy2Plus1>& X2,
142 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
143 const size_t num_capture_channels = R2.size();
144 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
145 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
146 R2[ch][k] = X2[k] * echo_path_gain;
147 }
148 }
149 }
150
151 // Applies a soft noise gate to the echo generating power.
ApplyNoiseGate(const EchoCanceller3Config::EchoModel & config,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)152 void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
153 rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
154 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
155 if (config.noise_gate_power > X2[k]) {
156 X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
157 (config.noise_gate_power - X2[k]));
158 }
159 }
160 }
161
162 // Estimates the echo generating signal power as gated maximal power over a
163 // time window.
EchoGeneratingPower(size_t num_render_channels,const SpectrumBuffer & spectrum_buffer,const EchoCanceller3Config::EchoModel & echo_model,int filter_delay_blocks,rtc::ArrayView<float,kFftLengthBy2Plus1> X2)164 void EchoGeneratingPower(size_t num_render_channels,
165 const SpectrumBuffer& spectrum_buffer,
166 const EchoCanceller3Config::EchoModel& echo_model,
167 int filter_delay_blocks,
168 rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
169 int idx_stop;
170 int idx_start;
171 GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
172 &idx_start, &idx_stop);
173
174 std::fill(X2.begin(), X2.end(), 0.f);
175 if (num_render_channels == 1) {
176 for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
177 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
178 X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
179 }
180 }
181 } else {
182 for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
183 std::array<float, kFftLengthBy2Plus1> render_power;
184 render_power.fill(0.f);
185 for (size_t ch = 0; ch < num_render_channels; ++ch) {
186 const auto& channel_power = spectrum_buffer.buffer[k][ch];
187 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
188 render_power[j] += channel_power[j];
189 }
190 }
191 for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
192 X2[j] = std::max(X2[j], render_power[j]);
193 }
194 }
195 }
196 }
197
198 } // namespace
199
ResidualEchoEstimator(const EchoCanceller3Config & config,size_t num_render_channels)200 ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
201 size_t num_render_channels)
202 : config_(config),
203 num_render_channels_(num_render_channels),
204 early_reflections_transparent_mode_gain_(
205 GetEarlyReflectionsTransparentModeGain()),
206 late_reflections_transparent_mode_gain_(
207 GetLateReflectionsTransparentModeGain()),
208 early_reflections_general_gain_(
209 GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
210 late_reflections_general_gain_(
211 GetLateReflectionsDefaultModeGain(config_.ep_strength)),
212 model_reverb_in_nonlinear_mode_(ModelReverbInNonlinearMode()) {
213 Reset();
214 }
215
216 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
217
Estimate(const AecState & aec_state,const RenderBuffer & render_buffer,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> S2_linear,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> Y2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)218 void ResidualEchoEstimator::Estimate(
219 const AecState& aec_state,
220 const RenderBuffer& render_buffer,
221 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
222 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
223 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
224 RTC_DCHECK_EQ(R2.size(), Y2.size());
225 RTC_DCHECK_EQ(R2.size(), S2_linear.size());
226
227 const size_t num_capture_channels = R2.size();
228
229 // Estimate the power of the stationary noise in the render signal.
230 UpdateRenderNoisePower(render_buffer);
231
232 // Estimate the residual echo power.
233 if (aec_state.UsableLinearEstimate()) {
234 // When there is saturated echo, assume the same spectral content as is
235 // present in the microphone signal.
236 if (aec_state.SaturatedEcho()) {
237 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
238 std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
239 }
240 } else {
241 absl::optional<float> erle_uncertainty = aec_state.ErleUncertainty();
242 if (erle_uncertainty) {
243 LinearEstimate(S2_linear, *erle_uncertainty, R2);
244 } else {
245 LinearEstimate(S2_linear, aec_state.Erle(), R2);
246 }
247 }
248
249 AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
250 } else {
251 const float echo_path_gain =
252 GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
253
254 // When there is saturated echo, assume the same spectral content as is
255 // present in the microphone signal.
256 if (aec_state.SaturatedEcho()) {
257 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
258 std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
259 }
260 } else {
261 // Estimate the echo generating signal power.
262 std::array<float, kFftLengthBy2Plus1> X2;
263 EchoGeneratingPower(num_render_channels_,
264 render_buffer.GetSpectrumBuffer(), config_.echo_model,
265 aec_state.MinDirectPathFilterDelay(), X2);
266 if (!aec_state.UseStationarityProperties()) {
267 ApplyNoiseGate(config_.echo_model, X2);
268 }
269
270 // Subtract the stationary noise power to avoid stationary noise causing
271 // excessive echo suppression.
272 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
273 X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
274 X2[k] = std::max(0.f, X2[k]);
275 }
276
277 NonLinearEstimate(echo_path_gain, X2, R2);
278 }
279
280 if (model_reverb_in_nonlinear_mode_ && !aec_state.TransparentMode()) {
281 AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
282 }
283 }
284
285 if (aec_state.UseStationarityProperties()) {
286 // Scale the echo according to echo audibility.
287 std::array<float, kFftLengthBy2Plus1> residual_scaling;
288 aec_state.GetResidualEchoScaling(residual_scaling);
289 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
290 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
291 R2[ch][k] *= residual_scaling[k];
292 }
293 }
294 }
295 }
296
Reset()297 void ResidualEchoEstimator::Reset() {
298 echo_reverb_.Reset();
299 X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
300 X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
301 }
302
UpdateRenderNoisePower(const RenderBuffer & render_buffer)303 void ResidualEchoEstimator::UpdateRenderNoisePower(
304 const RenderBuffer& render_buffer) {
305 std::array<float, kFftLengthBy2Plus1> render_power_data;
306 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
307 render_buffer.Spectrum(0);
308 rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
309 X2[/*channel=*/0];
310 if (num_render_channels_ > 1) {
311 render_power_data.fill(0.f);
312 for (size_t ch = 0; ch < num_render_channels_; ++ch) {
313 const auto& channel_power = X2[ch];
314 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
315 render_power_data[k] += channel_power[k];
316 }
317 }
318 render_power = render_power_data;
319 }
320
321 // Estimate the stationary noise power in a minimum statistics manner.
322 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
323 // Decrease rapidly.
324 if (render_power[k] < X2_noise_floor_[k]) {
325 X2_noise_floor_[k] = render_power[k];
326 X2_noise_floor_counter_[k] = 0;
327 } else {
328 // Increase in a delayed, leaky manner.
329 if (X2_noise_floor_counter_[k] >=
330 static_cast<int>(config_.echo_model.noise_floor_hold)) {
331 X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
332 config_.echo_model.min_noise_floor_power);
333 } else {
334 ++X2_noise_floor_counter_[k];
335 }
336 }
337 }
338 }
339
340 // Adds the estimated power of the reverb to the residual echo power.
AddReverb(ReverbType reverb_type,const AecState & aec_state,const RenderBuffer & render_buffer,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> R2)341 void ResidualEchoEstimator::AddReverb(
342 ReverbType reverb_type,
343 const AecState& aec_state,
344 const RenderBuffer& render_buffer,
345 rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
346 const size_t num_capture_channels = R2.size();
347
348 // Choose reverb partition based on what type of echo power model is used.
349 const size_t first_reverb_partition =
350 reverb_type == ReverbType::kLinear
351 ? aec_state.FilterLengthBlocks() + 1
352 : aec_state.MinDirectPathFilterDelay() + 1;
353
354 // Compute render power for the reverb.
355 std::array<float, kFftLengthBy2Plus1> render_power_data;
356 rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
357 render_buffer.Spectrum(first_reverb_partition);
358 rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
359 X2[/*channel=*/0];
360 if (num_render_channels_ > 1) {
361 render_power_data.fill(0.f);
362 for (size_t ch = 0; ch < num_render_channels_; ++ch) {
363 const auto& channel_power = X2[ch];
364 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
365 render_power_data[k] += channel_power[k];
366 }
367 }
368 render_power = render_power_data;
369 }
370
371 // Update the reverb estimate.
372 if (reverb_type == ReverbType::kLinear) {
373 echo_reverb_.UpdateReverb(render_power,
374 aec_state.GetReverbFrequencyResponse(),
375 aec_state.ReverbDecay());
376 } else {
377 const float echo_path_gain =
378 GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
379 echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
380 aec_state.ReverbDecay());
381 }
382
383 // Add the reverb power.
384 rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
385 echo_reverb_.reverb();
386 for (size_t ch = 0; ch < num_capture_channels; ++ch) {
387 for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
388 R2[ch][k] += reverb_power[k];
389 }
390 }
391 }
392
393 // Chooses the echo path gain to use.
GetEchoPathGain(const AecState & aec_state,bool gain_for_early_reflections) const394 float ResidualEchoEstimator::GetEchoPathGain(
395 const AecState& aec_state,
396 bool gain_for_early_reflections) const {
397 float gain_amplitude;
398 if (aec_state.TransparentMode()) {
399 gain_amplitude = gain_for_early_reflections
400 ? early_reflections_transparent_mode_gain_
401 : late_reflections_transparent_mode_gain_;
402 } else {
403 gain_amplitude = gain_for_early_reflections
404 ? early_reflections_general_gain_
405 : late_reflections_general_gain_;
406 }
407 return gain_amplitude * gain_amplitude;
408 }
409
410 } // namespace webrtc
411