1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #define _USE_MATH_DEFINES
12
13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
14
15 #include <algorithm>
16 #include <cmath>
17 #include <numeric>
18 #include <vector>
19
20 #include "webrtc/base/arraysize.h"
21 #include "webrtc/common_audio/window_generator.h"
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
23
24 namespace webrtc {
25 namespace {
26
27 // Alpha for the Kaiser Bessel Derived window.
28 const float kKbdAlpha = 1.5f;
29
30 const float kSpeedOfSoundMeterSeconds = 343;
31
32 // The minimum separation in radians between the target direction and an
33 // interferer scenario.
34 const float kMinAwayRadians = 0.2f;
35
36 // The separation between the target direction and the closest interferer
37 // scenario is proportional to this constant.
38 const float kAwaySlope = 0.008f;
39
40 // When calculating the interference covariance matrix, this is the weight for
41 // the weighted average between the uniform covariance matrix and the angled
42 // covariance matrix.
43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
44 const float kBalance = 0.95f;
45
46 // Alpha coefficients for mask smoothing.
47 const float kMaskTimeSmoothAlpha = 0.2f;
48 const float kMaskFrequencySmoothAlpha = 0.6f;
49
50 // The average mask is computed from masks in this mid-frequency range. If these
51 // ranges are changed |kMaskQuantile| might need to be adjusted.
52 const int kLowMeanStartHz = 200;
53 const int kLowMeanEndHz = 400;
54
55 // Range limiter for subtractive terms in the nominator and denominator of the
56 // postfilter expression. It handles the scenario mismatch between the true and
57 // model sources (target and interference).
58 const float kCutOffConstant = 0.9999f;
59
60 // Quantile of mask values which is used to estimate target presence.
61 const float kMaskQuantile = 0.7f;
62 // Mask threshold over which the data is considered signal and not interference.
63 // It has to be updated every time the postfilter calculation is changed
64 // significantly.
65 // TODO(aluebs): Write a tool to tune the target threshold automatically based
66 // on files annotated with target and interference ground truth.
67 const float kMaskTargetThreshold = 0.01f;
68 // Time in seconds after which the data is considered interference if the mask
69 // does not pass |kMaskTargetThreshold|.
70 const float kHoldTargetSeconds = 0.25f;
71
72 // To compensate for the attenuation this algorithm introduces to the target
73 // signal. It was estimated empirically from a low-noise low-reverberation
74 // recording from broadside.
75 const float kCompensationGain = 2.f;
76
77 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
78 // used; to accomplish this, we compute both multiplications in the same loop.
79 // The returned norm is clamped to be non-negative.
Norm(const ComplexMatrix<float> & mat,const ComplexMatrix<float> & norm_mat)80 float Norm(const ComplexMatrix<float>& mat,
81 const ComplexMatrix<float>& norm_mat) {
82 RTC_CHECK_EQ(1u, norm_mat.num_rows());
83 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
84 RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
85
86 complex<float> first_product = complex<float>(0.f, 0.f);
87 complex<float> second_product = complex<float>(0.f, 0.f);
88
89 const complex<float>* const* mat_els = mat.elements();
90 const complex<float>* const* norm_mat_els = norm_mat.elements();
91
92 for (size_t i = 0; i < norm_mat.num_columns(); ++i) {
93 for (size_t j = 0; j < norm_mat.num_columns(); ++j) {
94 first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
95 }
96 second_product += first_product * norm_mat_els[0][i];
97 first_product = 0.f;
98 }
99 return std::max(second_product.real(), 0.f);
100 }
101
102 // Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
ConjugateDotProduct(const ComplexMatrix<float> & lhs,const ComplexMatrix<float> & rhs)103 complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
104 const ComplexMatrix<float>& rhs) {
105 RTC_CHECK_EQ(1u, lhs.num_rows());
106 RTC_CHECK_EQ(1u, rhs.num_rows());
107 RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
108
109 const complex<float>* const* lhs_elements = lhs.elements();
110 const complex<float>* const* rhs_elements = rhs.elements();
111
112 complex<float> result = complex<float>(0.f, 0.f);
113 for (size_t i = 0; i < lhs.num_columns(); ++i) {
114 result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
115 }
116
117 return result;
118 }
119
120 // Works for positive numbers only.
Round(float x)121 size_t Round(float x) {
122 return static_cast<size_t>(std::floor(x + 0.5f));
123 }
124
125 // Calculates the sum of absolute values of a complex matrix.
SumAbs(const ComplexMatrix<float> & mat)126 float SumAbs(const ComplexMatrix<float>& mat) {
127 float sum_abs = 0.f;
128 const complex<float>* const* mat_els = mat.elements();
129 for (size_t i = 0; i < mat.num_rows(); ++i) {
130 for (size_t j = 0; j < mat.num_columns(); ++j) {
131 sum_abs += std::abs(mat_els[i][j]);
132 }
133 }
134 return sum_abs;
135 }
136
137 // Calculates the sum of squares of a complex matrix.
SumSquares(const ComplexMatrix<float> & mat)138 float SumSquares(const ComplexMatrix<float>& mat) {
139 float sum_squares = 0.f;
140 const complex<float>* const* mat_els = mat.elements();
141 for (size_t i = 0; i < mat.num_rows(); ++i) {
142 for (size_t j = 0; j < mat.num_columns(); ++j) {
143 float abs_value = std::abs(mat_els[i][j]);
144 sum_squares += abs_value * abs_value;
145 }
146 }
147 return sum_squares;
148 }
149
150 // Does |out| = |in|.' * conj(|in|) for row vector |in|.
TransposedConjugatedProduct(const ComplexMatrix<float> & in,ComplexMatrix<float> * out)151 void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
152 ComplexMatrix<float>* out) {
153 RTC_CHECK_EQ(1u, in.num_rows());
154 RTC_CHECK_EQ(out->num_rows(), in.num_columns());
155 RTC_CHECK_EQ(out->num_columns(), in.num_columns());
156 const complex<float>* in_elements = in.elements()[0];
157 complex<float>* const* out_elements = out->elements();
158 for (size_t i = 0; i < out->num_rows(); ++i) {
159 for (size_t j = 0; j < out->num_columns(); ++j) {
160 out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
161 }
162 }
163 }
164
GetCenteredArray(std::vector<Point> array_geometry)165 std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
166 for (size_t dim = 0; dim < 3; ++dim) {
167 float center = 0.f;
168 for (size_t i = 0; i < array_geometry.size(); ++i) {
169 center += array_geometry[i].c[dim];
170 }
171 center /= array_geometry.size();
172 for (size_t i = 0; i < array_geometry.size(); ++i) {
173 array_geometry[i].c[dim] -= center;
174 }
175 }
176 return array_geometry;
177 }
178
179 } // namespace
180
181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
182
183 // static
184 const size_t NonlinearBeamformer::kNumFreqBins;
185
NonlinearBeamformer(const std::vector<Point> & array_geometry,SphericalPointf target_direction)186 NonlinearBeamformer::NonlinearBeamformer(
187 const std::vector<Point>& array_geometry,
188 SphericalPointf target_direction)
189 : num_input_channels_(array_geometry.size()),
190 array_geometry_(GetCenteredArray(array_geometry)),
191 array_normal_(GetArrayNormalIfExists(array_geometry)),
192 min_mic_spacing_(GetMinimumSpacing(array_geometry)),
193 target_angle_radians_(target_direction.azimuth()),
194 away_radians_(std::min(
195 static_cast<float>(M_PI),
196 std::max(kMinAwayRadians,
197 kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
198 WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
199 }
200
Initialize(int chunk_size_ms,int sample_rate_hz)201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
202 chunk_length_ =
203 static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
204 sample_rate_hz_ = sample_rate_hz;
205
206 high_pass_postfilter_mask_ = 1.f;
207 is_target_present_ = false;
208 hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
209 interference_blocks_count_ = hold_target_blocks_;
210
211 lapped_transform_.reset(new LappedTransform(num_input_channels_,
212 1,
213 chunk_length_,
214 window_,
215 kFftSize,
216 kFftSize / 2,
217 this));
218 for (size_t i = 0; i < kNumFreqBins; ++i) {
219 time_smooth_mask_[i] = 1.f;
220 final_mask_[i] = 1.f;
221 float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
222 wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
223 }
224
225 InitLowFrequencyCorrectionRanges();
226 InitDiffuseCovMats();
227 AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
228 }
229
230 // These bin indexes determine the regions over which a mean is taken. This is
231 // applied as a constant value over the adjacent end "frequency correction"
232 // regions.
233 //
234 // low_mean_start_bin_ high_mean_start_bin_
235 // v v constant
236 // |----------------|--------|----------------|-------|----------------|
237 // constant ^ ^
238 // low_mean_end_bin_ high_mean_end_bin_
239 //
InitLowFrequencyCorrectionRanges()240 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
241 low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
242 low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
243
244 RTC_DCHECK_GT(low_mean_start_bin_, 0U);
245 RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
246 }
247
InitHighFrequencyCorrectionRanges()248 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
249 const float kAliasingFreqHz =
250 kSpeedOfSoundMeterSeconds /
251 (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
252 const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
253 sample_rate_hz_ / 2.f);
254 const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
255 sample_rate_hz_ / 2.f);
256 high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
257 high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
258
259 RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
260 RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
261 RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
262 }
263
InitInterfAngles()264 void NonlinearBeamformer::InitInterfAngles() {
265 interf_angles_radians_.clear();
266 const Point target_direction = AzimuthToPoint(target_angle_radians_);
267 const Point clockwise_interf_direction =
268 AzimuthToPoint(target_angle_radians_ - away_radians_);
269 if (!array_normal_ ||
270 DotProduct(*array_normal_, target_direction) *
271 DotProduct(*array_normal_, clockwise_interf_direction) >=
272 0.f) {
273 // The target and clockwise interferer are in the same half-plane defined
274 // by the array.
275 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
276 } else {
277 // Otherwise, the interferer will begin reflecting back at the target.
278 // Instead rotate it away 180 degrees.
279 interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
280 M_PI);
281 }
282 const Point counterclock_interf_direction =
283 AzimuthToPoint(target_angle_radians_ + away_radians_);
284 if (!array_normal_ ||
285 DotProduct(*array_normal_, target_direction) *
286 DotProduct(*array_normal_, counterclock_interf_direction) >=
287 0.f) {
288 // The target and counter-clockwise interferer are in the same half-plane
289 // defined by the array.
290 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
291 } else {
292 // Otherwise, the interferer will begin reflecting back at the target.
293 // Instead rotate it away 180 degrees.
294 interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
295 M_PI);
296 }
297 }
298
InitDelaySumMasks()299 void NonlinearBeamformer::InitDelaySumMasks() {
300 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
301 delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
302 CovarianceMatrixGenerator::PhaseAlignmentMasks(
303 f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
304 array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
305
306 complex_f norm_factor = sqrt(
307 ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
308 delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
309 normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
310 normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
311 normalized_delay_sum_masks_[f_ix]));
312 }
313 }
314
InitTargetCovMats()315 void NonlinearBeamformer::InitTargetCovMats() {
316 for (size_t i = 0; i < kNumFreqBins; ++i) {
317 target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
318 TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
319 }
320 }
321
InitDiffuseCovMats()322 void NonlinearBeamformer::InitDiffuseCovMats() {
323 for (size_t i = 0; i < kNumFreqBins; ++i) {
324 uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
325 CovarianceMatrixGenerator::UniformCovarianceMatrix(
326 wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
327 complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
328 uniform_cov_mat_[i].Scale(1.f / normalization_factor);
329 uniform_cov_mat_[i].Scale(1 - kBalance);
330 }
331 }
332
InitInterfCovMats()333 void NonlinearBeamformer::InitInterfCovMats() {
334 for (size_t i = 0; i < kNumFreqBins; ++i) {
335 interf_cov_mats_[i].clear();
336 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
337 interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
338 num_input_channels_));
339 ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
340 CovarianceMatrixGenerator::AngledCovarianceMatrix(
341 kSpeedOfSoundMeterSeconds,
342 interf_angles_radians_[j],
343 i,
344 kFftSize,
345 kNumFreqBins,
346 sample_rate_hz_,
347 array_geometry_,
348 &angled_cov_mat);
349 // Normalize matrices before averaging them.
350 complex_f normalization_factor = angled_cov_mat.elements()[0][0];
351 angled_cov_mat.Scale(1.f / normalization_factor);
352 // Weighted average of matrices.
353 angled_cov_mat.Scale(kBalance);
354 interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
355 }
356 }
357 }
358
NormalizeCovMats()359 void NonlinearBeamformer::NormalizeCovMats() {
360 for (size_t i = 0; i < kNumFreqBins; ++i) {
361 rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
362 rpsiws_[i].clear();
363 for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
364 rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
365 }
366 }
367 }
368
ProcessChunk(const ChannelBuffer<float> & input,ChannelBuffer<float> * output)369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
370 ChannelBuffer<float>* output) {
371 RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
372 RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
373
374 float old_high_pass_mask = high_pass_postfilter_mask_;
375 lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
376 // Ramp up/down for smoothing. 1 mask per 10ms results in audible
377 // discontinuities.
378 const float ramp_increment =
379 (high_pass_postfilter_mask_ - old_high_pass_mask) /
380 input.num_frames_per_band();
381 // Apply the smoothed high-pass mask to the first channel of each band.
382 // This can be done because the effect of the linear beamformer is negligible
383 // compared to the post-filter.
384 for (size_t i = 1; i < input.num_bands(); ++i) {
385 float smoothed_mask = old_high_pass_mask;
386 for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
387 smoothed_mask += ramp_increment;
388 output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
389 }
390 }
391 }
392
AimAt(const SphericalPointf & target_direction)393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
394 target_angle_radians_ = target_direction.azimuth();
395 InitHighFrequencyCorrectionRanges();
396 InitInterfAngles();
397 InitDelaySumMasks();
398 InitTargetCovMats();
399 InitInterfCovMats();
400 NormalizeCovMats();
401 }
402
IsInBeam(const SphericalPointf & spherical_point)403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
404 // If more than half-beamwidth degrees away from the beam's center,
405 // you are out of the beam.
406 return fabs(spherical_point.azimuth() - target_angle_radians_) <
407 kHalfBeamWidthRadians;
408 }
409
ProcessAudioBlock(const complex_f * const * input,size_t num_input_channels,size_t num_freq_bins,size_t num_output_channels,complex_f * const * output)410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
411 size_t num_input_channels,
412 size_t num_freq_bins,
413 size_t num_output_channels,
414 complex_f* const* output) {
415 RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
416 RTC_CHECK_EQ(num_input_channels_, num_input_channels);
417 RTC_CHECK_EQ(1u, num_output_channels);
418
419 // Calculating the post-filter masks. Note that we need two for each
420 // frequency bin to account for the positive and negative interferer
421 // angle.
422 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
423 eig_m_.CopyFromColumn(input, i, num_input_channels_);
424 float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
425 if (eig_m_norm_factor != 0.f) {
426 eig_m_.Scale(1.f / eig_m_norm_factor);
427 }
428
429 float rxim = Norm(target_cov_mats_[i], eig_m_);
430 float ratio_rxiw_rxim = 0.f;
431 if (rxim > 0.f) {
432 ratio_rxiw_rxim = rxiws_[i] / rxim;
433 }
434
435 complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
436 rmw *= rmw;
437 float rmw_r = rmw.real();
438
439 new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
440 rpsiws_[i][0],
441 ratio_rxiw_rxim,
442 rmw_r);
443 for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
444 float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
445 rpsiws_[i][j],
446 ratio_rxiw_rxim,
447 rmw_r);
448 if (tmp_mask < new_mask_[i]) {
449 new_mask_[i] = tmp_mask;
450 }
451 }
452 }
453
454 ApplyMaskTimeSmoothing();
455 EstimateTargetPresence();
456 ApplyLowFrequencyCorrection();
457 ApplyHighFrequencyCorrection();
458 ApplyMaskFrequencySmoothing();
459 ApplyMasks(input, output);
460 }
461
CalculatePostfilterMask(const ComplexMatrixF & interf_cov_mat,float rpsiw,float ratio_rxiw_rxim,float rmw_r)462 float NonlinearBeamformer::CalculatePostfilterMask(
463 const ComplexMatrixF& interf_cov_mat,
464 float rpsiw,
465 float ratio_rxiw_rxim,
466 float rmw_r) {
467 float rpsim = Norm(interf_cov_mat, eig_m_);
468
469 float ratio = 0.f;
470 if (rpsim > 0.f) {
471 ratio = rpsiw / rpsim;
472 }
473
474 return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /
475 (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
476 }
477
ApplyMasks(const complex_f * const * input,complex_f * const * output)478 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
479 complex_f* const* output) {
480 complex_f* output_channel = output[0];
481 for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
482 output_channel[f_ix] = complex_f(0.f, 0.f);
483
484 const complex_f* delay_sum_mask_els =
485 normalized_delay_sum_masks_[f_ix].elements()[0];
486 for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
487 output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
488 }
489
490 output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
491 }
492 }
493
494 // Smooth new_mask_ into time_smooth_mask_.
ApplyMaskTimeSmoothing()495 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
496 for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
497 time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
498 (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
499 }
500 }
501
502 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
ApplyMaskFrequencySmoothing()503 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
504 // Smooth over frequency in both directions. The "frequency correction"
505 // regions have constant value, but we enter them to smooth over the jump
506 // that exists at the boundary. However, this does mean when smoothing "away"
507 // from the region that we only need to use the last element.
508 //
509 // Upward smoothing:
510 // low_mean_start_bin_
511 // v
512 // |------|------------|------|
513 // ^------------------>^
514 //
515 // Downward smoothing:
516 // high_mean_end_bin_
517 // v
518 // |------|------------|------|
519 // ^<------------------^
520 std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
521 for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
522 final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
523 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
524 }
525 for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
526 final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
527 (1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
528 }
529 }
530
531 // Apply low frequency correction to time_smooth_mask_.
ApplyLowFrequencyCorrection()532 void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
533 const float low_frequency_mask =
534 MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
535 std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
536 low_frequency_mask);
537 }
538
539 // Apply high frequency correction to time_smooth_mask_. Update
540 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
ApplyHighFrequencyCorrection()541 void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
542 high_pass_postfilter_mask_ =
543 MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
544 std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
545 time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
546 }
547
548 // Compute mean over the given range of time_smooth_mask_, [first, last).
MaskRangeMean(size_t first,size_t last)549 float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
550 RTC_DCHECK_GT(last, first);
551 const float sum = std::accumulate(time_smooth_mask_ + first,
552 time_smooth_mask_ + last, 0.f);
553 return sum / (last - first);
554 }
555
EstimateTargetPresence()556 void NonlinearBeamformer::EstimateTargetPresence() {
557 const size_t quantile = static_cast<size_t>(
558 (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
559 low_mean_start_bin_);
560 std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
561 new_mask_ + high_mean_end_bin_ + 1);
562 if (new_mask_[quantile] > kMaskTargetThreshold) {
563 is_target_present_ = true;
564 interference_blocks_count_ = 0;
565 } else {
566 is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
567 }
568 }
569
570 } // namespace webrtc
571