• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/erle_estimator.h"
12 
13 #include <cmath>
14 
15 #include "api/array_view.h"
16 #include "modules/audio_processing/aec3/render_delay_buffer.h"
17 #include "modules/audio_processing/aec3/spectrum_buffer.h"
18 #include "rtc_base/random.h"
19 #include "rtc_base/strings/string_builder.h"
20 #include "test/gtest.h"
21 
22 namespace webrtc {
23 
24 namespace {
25 constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2;
26 constexpr float kTrueErle = 10.f;
27 constexpr float kTrueErleOnsets = 1.0f;
28 constexpr float kEchoPathGain = 3.f;
29 
VerifyErleBands(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,float reference_lf,float reference_hf)30 void VerifyErleBands(
31     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
32     float reference_lf,
33     float reference_hf) {
34   for (size_t ch = 0; ch < erle.size(); ++ch) {
35     std::for_each(
36         erle[ch].begin(), erle[ch].begin() + kLowFrequencyLimit,
37         [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); });
38     std::for_each(
39         erle[ch].begin() + kLowFrequencyLimit, erle[ch].end(),
40         [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); });
41   }
42 }
43 
VerifyErle(rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> erle,float erle_time_domain,float reference_lf,float reference_hf)44 void VerifyErle(
45     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
46     float erle_time_domain,
47     float reference_lf,
48     float reference_hf) {
49   VerifyErleBands(erle, reference_lf, reference_hf);
50   EXPECT_NEAR(reference_lf, erle_time_domain, 0.5);
51 }
52 
FormFarendTimeFrame(std::vector<std::vector<std::vector<float>>> * x)53 void FormFarendTimeFrame(std::vector<std::vector<std::vector<float>>>* x) {
54   const std::array<float, kBlockSize> frame = {
55       7459.88, 17209.6, 17383,   20768.9, 16816.7, 18386.3, 4492.83, 9675.85,
56       6665.52, 14808.6, 9342.3,  7483.28, 19261.7, 4145.98, 1622.18, 13475.2,
57       7166.32, 6856.61, 21937,   7263.14, 9569.07, 14919,   8413.32, 7551.89,
58       7848.65, 6011.27, 13080.6, 15865.2, 12656,   17459.6, 4263.93, 4503.03,
59       9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6,
60       11405,   15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8,
61       1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4,
62       12416.2, 16434,   2454.69, 9840.8,  6867.23, 1615.75, 6059.9,  8394.19};
63   for (size_t band = 0; band < x->size(); ++band) {
64     for (size_t channel = 0; channel < (*x)[band].size(); ++channel) {
65       RTC_DCHECK_GE((*x)[band][channel].size(), frame.size());
66       std::copy(frame.begin(), frame.end(), (*x)[band][channel].begin());
67     }
68   }
69 }
70 
FormFarendFrame(const RenderBuffer & render_buffer,float erle,std::array<float,kFftLengthBy2Plus1> * X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> E2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> Y2)71 void FormFarendFrame(const RenderBuffer& render_buffer,
72                      float erle,
73                      std::array<float, kFftLengthBy2Plus1>* X2,
74                      rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
75                      rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
76   const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer();
77   const int num_render_channels = spectrum_buffer.buffer[0].size();
78   const int num_capture_channels = Y2.size();
79 
80   X2->fill(0.f);
81   for (int ch = 0; ch < num_render_channels; ++ch) {
82     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
83       (*X2)[k] += spectrum_buffer.buffer[spectrum_buffer.write][ch][k] /
84                   num_render_channels;
85     }
86   }
87 
88   for (int ch = 0; ch < num_capture_channels; ++ch) {
89     std::transform(X2->begin(), X2->end(), Y2[ch].begin(),
90                    [](float a) { return a * kEchoPathGain * kEchoPathGain; });
91     std::transform(Y2[ch].begin(), Y2[ch].end(), E2[ch].begin(),
92                    [erle](float a) { return a / erle; });
93   }
94 }
95 
FormNearendFrame(std::vector<std::vector<std::vector<float>>> * x,std::array<float,kFftLengthBy2Plus1> * X2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> E2,rtc::ArrayView<std::array<float,kFftLengthBy2Plus1>> Y2)96 void FormNearendFrame(
97     std::vector<std::vector<std::vector<float>>>* x,
98     std::array<float, kFftLengthBy2Plus1>* X2,
99     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2,
100     rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2) {
101   for (size_t band = 0; band < x->size(); ++band) {
102     for (size_t ch = 0; ch < (*x)[band].size(); ++ch) {
103       std::fill((*x)[band][ch].begin(), (*x)[band][ch].end(), 0.f);
104     }
105   }
106 
107   X2->fill(0.f);
108   for (size_t ch = 0; ch < Y2.size(); ++ch) {
109     Y2[ch].fill(500.f * 1000.f * 1000.f);
110     E2[ch].fill(Y2[ch][0]);
111   }
112 }
113 
GetFilterFreq(size_t delay_headroom_samples,rtc::ArrayView<std::vector<std::array<float,kFftLengthBy2Plus1>>> filter_frequency_response)114 void GetFilterFreq(
115     size_t delay_headroom_samples,
116     rtc::ArrayView<std::vector<std::array<float, kFftLengthBy2Plus1>>>
117         filter_frequency_response) {
118   const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize;
119   for (size_t ch = 0; ch < filter_frequency_response[0].size(); ++ch) {
120     for (auto& block_freq_resp : filter_frequency_response) {
121       block_freq_resp[ch].fill(0.f);
122     }
123 
124     for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
125       filter_frequency_response[delay_headroom_blocks][ch][k] = kEchoPathGain;
126     }
127   }
128 }
129 
130 }  // namespace
131 
132 class ErleEstimatorMultiChannel
133     : public ::testing::Test,
134       public ::testing::WithParamInterface<std::tuple<size_t, size_t>> {};
135 
136 INSTANTIATE_TEST_SUITE_P(MultiChannel,
137                          ErleEstimatorMultiChannel,
138                          ::testing::Combine(::testing::Values(1, 2, 4, 8),
139                                             ::testing::Values(1, 2, 8)));
140 
TEST_P(ErleEstimatorMultiChannel,VerifyErleIncreaseAndHold)141 TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) {
142   const size_t num_render_channels = std::get<0>(GetParam());
143   const size_t num_capture_channels = std::get<1>(GetParam());
144   constexpr int kSampleRateHz = 48000;
145   constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
146 
147   std::array<float, kFftLengthBy2Plus1> X2;
148   std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
149   std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
150   std::vector<bool> converged_filters(num_capture_channels, true);
151 
152   EchoCanceller3Config config;
153   config.erle.onset_detection = true;
154 
155   std::vector<std::vector<std::vector<float>>> x(
156       kNumBands, std::vector<std::vector<float>>(
157                      num_render_channels, std::vector<float>(kBlockSize, 0.f)));
158   std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
159   filter_frequency_response(
160       config.filter.refined.length_blocks,
161       std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels));
162   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
163       RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
164 
165   GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
166 
167   ErleEstimator estimator(0, config, num_capture_channels);
168 
169   FormFarendTimeFrame(&x);
170   render_delay_buffer->Insert(x);
171   render_delay_buffer->PrepareCaptureProcessing();
172   // Verifies that the ERLE estimate is properly increased to higher values.
173   FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
174                   Y2);
175   for (size_t k = 0; k < 200; ++k) {
176     render_delay_buffer->Insert(x);
177     render_delay_buffer->PrepareCaptureProcessing();
178     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
179                      filter_frequency_response, X2, Y2, E2, converged_filters);
180   }
181   VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
182              config.erle.max_l, config.erle.max_h);
183 
184   FormNearendFrame(&x, &X2, E2, Y2);
185   // Verifies that the ERLE is not immediately decreased during nearend
186   // activity.
187   for (size_t k = 0; k < 50; ++k) {
188     render_delay_buffer->Insert(x);
189     render_delay_buffer->PrepareCaptureProcessing();
190     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
191                      filter_frequency_response, X2, Y2, E2, converged_filters);
192   }
193   VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
194              config.erle.max_l, config.erle.max_h);
195 }
196 
TEST_P(ErleEstimatorMultiChannel,VerifyErleTrackingOnOnsets)197 TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) {
198   const size_t num_render_channels = std::get<0>(GetParam());
199   const size_t num_capture_channels = std::get<1>(GetParam());
200   constexpr int kSampleRateHz = 48000;
201   constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
202 
203   std::array<float, kFftLengthBy2Plus1> X2;
204   std::vector<std::array<float, kFftLengthBy2Plus1>> E2(num_capture_channels);
205   std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(num_capture_channels);
206   std::vector<bool> converged_filters(num_capture_channels, true);
207   EchoCanceller3Config config;
208   config.erle.onset_detection = true;
209   std::vector<std::vector<std::vector<float>>> x(
210       kNumBands, std::vector<std::vector<float>>(
211                      num_render_channels, std::vector<float>(kBlockSize, 0.f)));
212   std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
213   filter_frequency_response(
214       config.filter.refined.length_blocks,
215       std::vector<std::array<float, kFftLengthBy2Plus1>>(num_capture_channels));
216   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
217       RenderDelayBuffer::Create(config, kSampleRateHz, num_render_channels));
218 
219   GetFilterFreq(config.delay.delay_headroom_samples, filter_frequency_response);
220 
221   ErleEstimator estimator(/*startup_phase_length_blocks=*/0, config,
222                           num_capture_channels);
223 
224   FormFarendTimeFrame(&x);
225   render_delay_buffer->Insert(x);
226   render_delay_buffer->PrepareCaptureProcessing();
227 
228   for (size_t burst = 0; burst < 20; ++burst) {
229     FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErleOnsets,
230                     &X2, E2, Y2);
231     for (size_t k = 0; k < 10; ++k) {
232       render_delay_buffer->Insert(x);
233       render_delay_buffer->PrepareCaptureProcessing();
234       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
235                        filter_frequency_response, X2, Y2, E2,
236                        converged_filters);
237     }
238     FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), kTrueErle, &X2, E2,
239                     Y2);
240     for (size_t k = 0; k < 200; ++k) {
241       render_delay_buffer->Insert(x);
242       render_delay_buffer->PrepareCaptureProcessing();
243       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
244                        filter_frequency_response, X2, Y2, E2,
245                        converged_filters);
246     }
247     FormNearendFrame(&x, &X2, E2, Y2);
248     for (size_t k = 0; k < 300; ++k) {
249       render_delay_buffer->Insert(x);
250       render_delay_buffer->PrepareCaptureProcessing();
251       estimator.Update(*render_delay_buffer->GetRenderBuffer(),
252                        filter_frequency_response, X2, Y2, E2,
253                        converged_filters);
254     }
255   }
256   VerifyErleBands(estimator.ErleOnsets(), config.erle.min, config.erle.min);
257   FormNearendFrame(&x, &X2, E2, Y2);
258   for (size_t k = 0; k < 1000; k++) {
259     estimator.Update(*render_delay_buffer->GetRenderBuffer(),
260                      filter_frequency_response, X2, Y2, E2, converged_filters);
261   }
262   // Verifies that during ne activity, Erle converges to the Erle for
263   // onsets.
264   VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()),
265              config.erle.min, config.erle.min);
266 }
267 
268 }  // namespace webrtc
269