1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/residual_echo_detector.h"
12
13 #include <algorithm>
14 #include <numeric>
15
16 #include "absl/types/optional.h"
17 #include "modules/audio_processing/logging/apm_data_dumper.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/logging.h"
20 #include "system_wrappers/include/metrics.h"
21
22 namespace {
23
Power(rtc::ArrayView<const float> input)24 float Power(rtc::ArrayView<const float> input) {
25 if (input.empty()) {
26 return 0.f;
27 }
28 return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
29 input.size();
30 }
31
32 constexpr size_t kLookbackFrames = 650;
33 // TODO(ivoc): Verify the size of this buffer.
34 constexpr size_t kRenderBufferSize = 30;
35 constexpr float kAlpha = 0.001f;
36 // 10 seconds of data, updated every 10 ms.
37 constexpr size_t kAggregationBufferSize = 10 * 100;
38
39 } // namespace
40
41 namespace webrtc {
42
43 std::atomic<int> ResidualEchoDetector::instance_count_(0);
44
ResidualEchoDetector()45 ResidualEchoDetector::ResidualEchoDetector()
46 : data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
47 render_buffer_(kRenderBufferSize),
48 render_power_(kLookbackFrames),
49 render_power_mean_(kLookbackFrames),
50 render_power_std_dev_(kLookbackFrames),
51 covariances_(kLookbackFrames),
52 recent_likelihood_max_(kAggregationBufferSize) {}
53
54 ResidualEchoDetector::~ResidualEchoDetector() = default;
55
AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio)56 void ResidualEchoDetector::AnalyzeRenderAudio(
57 rtc::ArrayView<const float> render_audio) {
58 // Dump debug data assuming 48 kHz sample rate (if this assumption is not
59 // valid the dumped audio will need to be converted offline accordingly).
60 data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
61 48000, 1);
62
63 if (render_buffer_.Size() == 0) {
64 frames_since_zero_buffer_size_ = 0;
65 } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
66 // This can happen in a few cases: at the start of a call, due to a glitch
67 // or due to clock drift. The excess capture value will be ignored.
68 // TODO(ivoc): Include how often this happens in APM stats.
69 render_buffer_.Pop();
70 frames_since_zero_buffer_size_ = 0;
71 }
72 ++frames_since_zero_buffer_size_;
73 float power = Power(render_audio);
74 render_buffer_.Push(power);
75 }
76
AnalyzeCaptureAudio(rtc::ArrayView<const float> capture_audio)77 void ResidualEchoDetector::AnalyzeCaptureAudio(
78 rtc::ArrayView<const float> capture_audio) {
79 // Dump debug data assuming 48 kHz sample rate (if this assumption is not
80 // valid the dumped audio will need to be converted offline accordingly).
81 data_dumper_->DumpWav("ed_capture", capture_audio.size(),
82 capture_audio.data(), 48000, 1);
83
84 if (first_process_call_) {
85 // On the first process call (so the start of a call), we must flush the
86 // render buffer, otherwise the render data will be delayed.
87 render_buffer_.Clear();
88 first_process_call_ = false;
89 }
90
91 // Get the next render value.
92 const absl::optional<float> buffered_render_power = render_buffer_.Pop();
93 if (!buffered_render_power) {
94 // This can happen in a few cases: at the start of a call, due to a glitch
95 // or due to clock drift. The excess capture value will be ignored.
96 // TODO(ivoc): Include how often this happens in APM stats.
97 return;
98 }
99 // Update the render statistics, and store the statistics in circular buffers.
100 render_statistics_.Update(*buffered_render_power);
101 RTC_DCHECK_LT(next_insertion_index_, kLookbackFrames);
102 render_power_[next_insertion_index_] = *buffered_render_power;
103 render_power_mean_[next_insertion_index_] = render_statistics_.mean();
104 render_power_std_dev_[next_insertion_index_] =
105 render_statistics_.std_deviation();
106
107 // Get the next capture value, update capture statistics and add the relevant
108 // values to the buffers.
109 const float capture_power = Power(capture_audio);
110 capture_statistics_.Update(capture_power);
111 const float capture_mean = capture_statistics_.mean();
112 const float capture_std_deviation = capture_statistics_.std_deviation();
113
114 // Update the covariance values and determine the new echo likelihood.
115 echo_likelihood_ = 0.f;
116 size_t read_index = next_insertion_index_;
117
118 int best_delay = -1;
119 for (size_t delay = 0; delay < covariances_.size(); ++delay) {
120 RTC_DCHECK_LT(read_index, render_power_.size());
121 covariances_[delay].Update(capture_power, capture_mean,
122 capture_std_deviation, render_power_[read_index],
123 render_power_mean_[read_index],
124 render_power_std_dev_[read_index]);
125 read_index = read_index > 0 ? read_index - 1 : kLookbackFrames - 1;
126
127 if (covariances_[delay].normalized_cross_correlation() > echo_likelihood_) {
128 echo_likelihood_ = covariances_[delay].normalized_cross_correlation();
129 best_delay = static_cast<int>(delay);
130 }
131 }
132 // This is a temporary log message to help find the underlying cause for echo
133 // likelihoods > 1.0.
134 // TODO(ivoc): Remove once the issue is resolved.
135 if (echo_likelihood_ > 1.1f) {
136 // Make sure we don't spam the log.
137 if (log_counter_ < 5 && best_delay != -1) {
138 size_t read_index = kLookbackFrames + next_insertion_index_ - best_delay;
139 if (read_index >= kLookbackFrames) {
140 read_index -= kLookbackFrames;
141 }
142 RTC_DCHECK_LT(read_index, render_power_.size());
143 RTC_LOG_F(LS_ERROR) << "Echo detector internal state: {"
144 "Echo likelihood: "
145 << echo_likelihood_ << ", Best Delay: " << best_delay
146 << ", Covariance: "
147 << covariances_[best_delay].covariance()
148 << ", Last capture power: " << capture_power
149 << ", Capture mean: " << capture_mean
150 << ", Capture_standard deviation: "
151 << capture_std_deviation << ", Last render power: "
152 << render_power_[read_index]
153 << ", Render mean: " << render_power_mean_[read_index]
154 << ", Render standard deviation: "
155 << render_power_std_dev_[read_index]
156 << ", Reliability: " << reliability_ << "}";
157 log_counter_++;
158 }
159 }
160 RTC_DCHECK_LT(echo_likelihood_, 1.1f);
161
162 reliability_ = (1.0f - kAlpha) * reliability_ + kAlpha * 1.0f;
163 echo_likelihood_ *= reliability_;
164 // This is a temporary fix to prevent echo likelihood values > 1.0.
165 // TODO(ivoc): Find the root cause of this issue and fix it.
166 echo_likelihood_ = std::min(echo_likelihood_, 1.0f);
167 int echo_percentage = static_cast<int>(echo_likelihood_ * 100);
168 RTC_HISTOGRAM_COUNTS("WebRTC.Audio.ResidualEchoDetector.EchoLikelihood",
169 echo_percentage, 0, 100, 100 /* number of bins */);
170
171 // Update the buffer of recent likelihood values.
172 recent_likelihood_max_.Update(echo_likelihood_);
173
174 // Update the next insertion index.
175 next_insertion_index_ = next_insertion_index_ < (kLookbackFrames - 1)
176 ? next_insertion_index_ + 1
177 : 0;
178 }
179
Initialize(int,int,int,int)180 void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/,
181 int /*num_capture_channels*/,
182 int /*render_sample_rate_hz*/,
183 int /*num_render_channels*/) {
184 render_buffer_.Clear();
185 std::fill(render_power_.begin(), render_power_.end(), 0.f);
186 std::fill(render_power_mean_.begin(), render_power_mean_.end(), 0.f);
187 std::fill(render_power_std_dev_.begin(), render_power_std_dev_.end(), 0.f);
188 render_statistics_.Clear();
189 capture_statistics_.Clear();
190 recent_likelihood_max_.Clear();
191 for (auto& cov : covariances_) {
192 cov.Clear();
193 }
194 echo_likelihood_ = 0.f;
195 next_insertion_index_ = 0;
196 reliability_ = 0.f;
197 }
198
GetMetrics() const199 EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const {
200 EchoDetector::Metrics metrics;
201 metrics.echo_likelihood = echo_likelihood_;
202 metrics.echo_likelihood_recent_max = recent_likelihood_max_.max();
203 return metrics;
204 }
205 } // namespace webrtc
206