• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #define _USE_MATH_DEFINES
12 
13 #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
14 
15 #include <algorithm>
16 #include <cmath>
17 #include <numeric>
18 #include <vector>
19 
20 #include "webrtc/base/arraysize.h"
21 #include "webrtc/common_audio/window_generator.h"
22 #include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
23 
24 namespace webrtc {
25 namespace {
26 
27 // Alpha for the Kaiser Bessel Derived window.
28 const float kKbdAlpha = 1.5f;
29 
30 const float kSpeedOfSoundMeterSeconds = 343;
31 
32 // The minimum separation in radians between the target direction and an
33 // interferer scenario.
34 const float kMinAwayRadians = 0.2f;
35 
36 // The separation between the target direction and the closest interferer
37 // scenario is proportional to this constant.
38 const float kAwaySlope = 0.008f;
39 
40 // When calculating the interference covariance matrix, this is the weight for
41 // the weighted average between the uniform covariance matrix and the angled
42 // covariance matrix.
43 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
44 const float kBalance = 0.95f;
45 
46 // Alpha coefficients for mask smoothing.
47 const float kMaskTimeSmoothAlpha = 0.2f;
48 const float kMaskFrequencySmoothAlpha = 0.6f;
49 
50 // The average mask is computed from masks in this mid-frequency range. If these
51 // ranges are changed |kMaskQuantile| might need to be adjusted.
52 const int kLowMeanStartHz = 200;
53 const int kLowMeanEndHz = 400;
54 
55 // Range limiter for subtractive terms in the nominator and denominator of the
56 // postfilter expression. It handles the scenario mismatch between the true and
57 // model sources (target and interference).
58 const float kCutOffConstant = 0.9999f;
59 
60 // Quantile of mask values which is used to estimate target presence.
61 const float kMaskQuantile = 0.7f;
62 // Mask threshold over which the data is considered signal and not interference.
63 // It has to be updated every time the postfilter calculation is changed
64 // significantly.
65 // TODO(aluebs): Write a tool to tune the target threshold automatically based
66 // on files annotated with target and interference ground truth.
67 const float kMaskTargetThreshold = 0.01f;
68 // Time in seconds after which the data is considered interference if the mask
69 // does not pass |kMaskTargetThreshold|.
70 const float kHoldTargetSeconds = 0.25f;
71 
72 // To compensate for the attenuation this algorithm introduces to the target
73 // signal. It was estimated empirically from a low-noise low-reverberation
74 // recording from broadside.
75 const float kCompensationGain = 2.f;
76 
77 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
78 // used; to accomplish this, we compute both multiplications in the same loop.
79 // The returned norm is clamped to be non-negative.
Norm(const ComplexMatrix<float> & mat,const ComplexMatrix<float> & norm_mat)80 float Norm(const ComplexMatrix<float>& mat,
81            const ComplexMatrix<float>& norm_mat) {
82   RTC_CHECK_EQ(1u, norm_mat.num_rows());
83   RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
84   RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
85 
86   complex<float> first_product = complex<float>(0.f, 0.f);
87   complex<float> second_product = complex<float>(0.f, 0.f);
88 
89   const complex<float>* const* mat_els = mat.elements();
90   const complex<float>* const* norm_mat_els = norm_mat.elements();
91 
92   for (size_t i = 0; i < norm_mat.num_columns(); ++i) {
93     for (size_t j = 0; j < norm_mat.num_columns(); ++j) {
94       first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
95     }
96     second_product += first_product * norm_mat_els[0][i];
97     first_product = 0.f;
98   }
99   return std::max(second_product.real(), 0.f);
100 }
101 
102 // Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
ConjugateDotProduct(const ComplexMatrix<float> & lhs,const ComplexMatrix<float> & rhs)103 complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
104                                    const ComplexMatrix<float>& rhs) {
105   RTC_CHECK_EQ(1u, lhs.num_rows());
106   RTC_CHECK_EQ(1u, rhs.num_rows());
107   RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
108 
109   const complex<float>* const* lhs_elements = lhs.elements();
110   const complex<float>* const* rhs_elements = rhs.elements();
111 
112   complex<float> result = complex<float>(0.f, 0.f);
113   for (size_t i = 0; i < lhs.num_columns(); ++i) {
114     result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
115   }
116 
117   return result;
118 }
119 
120 // Works for positive numbers only.
Round(float x)121 size_t Round(float x) {
122   return static_cast<size_t>(std::floor(x + 0.5f));
123 }
124 
125 // Calculates the sum of absolute values of a complex matrix.
SumAbs(const ComplexMatrix<float> & mat)126 float SumAbs(const ComplexMatrix<float>& mat) {
127   float sum_abs = 0.f;
128   const complex<float>* const* mat_els = mat.elements();
129   for (size_t i = 0; i < mat.num_rows(); ++i) {
130     for (size_t j = 0; j < mat.num_columns(); ++j) {
131       sum_abs += std::abs(mat_els[i][j]);
132     }
133   }
134   return sum_abs;
135 }
136 
137 // Calculates the sum of squares of a complex matrix.
SumSquares(const ComplexMatrix<float> & mat)138 float SumSquares(const ComplexMatrix<float>& mat) {
139   float sum_squares = 0.f;
140   const complex<float>* const* mat_els = mat.elements();
141   for (size_t i = 0; i < mat.num_rows(); ++i) {
142     for (size_t j = 0; j < mat.num_columns(); ++j) {
143       float abs_value = std::abs(mat_els[i][j]);
144       sum_squares += abs_value * abs_value;
145     }
146   }
147   return sum_squares;
148 }
149 
150 // Does |out| = |in|.' * conj(|in|) for row vector |in|.
TransposedConjugatedProduct(const ComplexMatrix<float> & in,ComplexMatrix<float> * out)151 void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
152                                  ComplexMatrix<float>* out) {
153   RTC_CHECK_EQ(1u, in.num_rows());
154   RTC_CHECK_EQ(out->num_rows(), in.num_columns());
155   RTC_CHECK_EQ(out->num_columns(), in.num_columns());
156   const complex<float>* in_elements = in.elements()[0];
157   complex<float>* const* out_elements = out->elements();
158   for (size_t i = 0; i < out->num_rows(); ++i) {
159     for (size_t j = 0; j < out->num_columns(); ++j) {
160       out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
161     }
162   }
163 }
164 
GetCenteredArray(std::vector<Point> array_geometry)165 std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
166   for (size_t dim = 0; dim < 3; ++dim) {
167     float center = 0.f;
168     for (size_t i = 0; i < array_geometry.size(); ++i) {
169       center += array_geometry[i].c[dim];
170     }
171     center /= array_geometry.size();
172     for (size_t i = 0; i < array_geometry.size(); ++i) {
173       array_geometry[i].c[dim] -= center;
174     }
175   }
176   return array_geometry;
177 }
178 
179 }  // namespace
180 
181 const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
182 
183 // static
184 const size_t NonlinearBeamformer::kNumFreqBins;
185 
NonlinearBeamformer(const std::vector<Point> & array_geometry,SphericalPointf target_direction)186 NonlinearBeamformer::NonlinearBeamformer(
187     const std::vector<Point>& array_geometry,
188     SphericalPointf target_direction)
189     : num_input_channels_(array_geometry.size()),
190       array_geometry_(GetCenteredArray(array_geometry)),
191       array_normal_(GetArrayNormalIfExists(array_geometry)),
192       min_mic_spacing_(GetMinimumSpacing(array_geometry)),
193       target_angle_radians_(target_direction.azimuth()),
194       away_radians_(std::min(
195           static_cast<float>(M_PI),
196           std::max(kMinAwayRadians,
197                    kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
198   WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
199 }
200 
Initialize(int chunk_size_ms,int sample_rate_hz)201 void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
202   chunk_length_ =
203       static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
204   sample_rate_hz_ = sample_rate_hz;
205 
206   high_pass_postfilter_mask_ = 1.f;
207   is_target_present_ = false;
208   hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
209   interference_blocks_count_ = hold_target_blocks_;
210 
211   lapped_transform_.reset(new LappedTransform(num_input_channels_,
212                                               1,
213                                               chunk_length_,
214                                               window_,
215                                               kFftSize,
216                                               kFftSize / 2,
217                                               this));
218   for (size_t i = 0; i < kNumFreqBins; ++i) {
219     time_smooth_mask_[i] = 1.f;
220     final_mask_[i] = 1.f;
221     float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
222     wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
223   }
224 
225   InitLowFrequencyCorrectionRanges();
226   InitDiffuseCovMats();
227   AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
228 }
229 
230 // These bin indexes determine the regions over which a mean is taken. This is
231 // applied as a constant value over the adjacent end "frequency correction"
232 // regions.
233 //
234 //             low_mean_start_bin_     high_mean_start_bin_
235 //                   v                         v              constant
236 // |----------------|--------|----------------|-------|----------------|
237 //   constant               ^                        ^
238 //             low_mean_end_bin_       high_mean_end_bin_
239 //
InitLowFrequencyCorrectionRanges()240 void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
241   low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
242   low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
243 
244   RTC_DCHECK_GT(low_mean_start_bin_, 0U);
245   RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
246 }
247 
InitHighFrequencyCorrectionRanges()248 void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
249   const float kAliasingFreqHz =
250       kSpeedOfSoundMeterSeconds /
251       (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
252   const float kHighMeanStartHz = std::min(0.5f *  kAliasingFreqHz,
253                                           sample_rate_hz_ / 2.f);
254   const float kHighMeanEndHz = std::min(0.75f *  kAliasingFreqHz,
255                                         sample_rate_hz_ / 2.f);
256   high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
257   high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
258 
259   RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
260   RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
261   RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
262 }
263 
InitInterfAngles()264 void NonlinearBeamformer::InitInterfAngles() {
265   interf_angles_radians_.clear();
266   const Point target_direction = AzimuthToPoint(target_angle_radians_);
267   const Point clockwise_interf_direction =
268       AzimuthToPoint(target_angle_radians_ - away_radians_);
269   if (!array_normal_ ||
270       DotProduct(*array_normal_, target_direction) *
271               DotProduct(*array_normal_, clockwise_interf_direction) >=
272           0.f) {
273     // The target and clockwise interferer are in the same half-plane defined
274     // by the array.
275     interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
276   } else {
277     // Otherwise, the interferer will begin reflecting back at the target.
278     // Instead rotate it away 180 degrees.
279     interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
280                                      M_PI);
281   }
282   const Point counterclock_interf_direction =
283       AzimuthToPoint(target_angle_radians_ + away_radians_);
284   if (!array_normal_ ||
285       DotProduct(*array_normal_, target_direction) *
286               DotProduct(*array_normal_, counterclock_interf_direction) >=
287           0.f) {
288     // The target and counter-clockwise interferer are in the same half-plane
289     // defined by the array.
290     interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
291   } else {
292     // Otherwise, the interferer will begin reflecting back at the target.
293     // Instead rotate it away 180 degrees.
294     interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
295                                      M_PI);
296   }
297 }
298 
InitDelaySumMasks()299 void NonlinearBeamformer::InitDelaySumMasks() {
300   for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
301     delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
302     CovarianceMatrixGenerator::PhaseAlignmentMasks(
303         f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
304         array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
305 
306     complex_f norm_factor = sqrt(
307         ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
308     delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
309     normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
310     normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
311         normalized_delay_sum_masks_[f_ix]));
312   }
313 }
314 
InitTargetCovMats()315 void NonlinearBeamformer::InitTargetCovMats() {
316   for (size_t i = 0; i < kNumFreqBins; ++i) {
317     target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
318     TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
319   }
320 }
321 
InitDiffuseCovMats()322 void NonlinearBeamformer::InitDiffuseCovMats() {
323   for (size_t i = 0; i < kNumFreqBins; ++i) {
324     uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
325     CovarianceMatrixGenerator::UniformCovarianceMatrix(
326         wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
327     complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
328     uniform_cov_mat_[i].Scale(1.f / normalization_factor);
329     uniform_cov_mat_[i].Scale(1 - kBalance);
330   }
331 }
332 
InitInterfCovMats()333 void NonlinearBeamformer::InitInterfCovMats() {
334   for (size_t i = 0; i < kNumFreqBins; ++i) {
335     interf_cov_mats_[i].clear();
336     for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
337       interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
338                                                        num_input_channels_));
339       ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
340       CovarianceMatrixGenerator::AngledCovarianceMatrix(
341           kSpeedOfSoundMeterSeconds,
342           interf_angles_radians_[j],
343           i,
344           kFftSize,
345           kNumFreqBins,
346           sample_rate_hz_,
347           array_geometry_,
348           &angled_cov_mat);
349       // Normalize matrices before averaging them.
350       complex_f normalization_factor = angled_cov_mat.elements()[0][0];
351       angled_cov_mat.Scale(1.f / normalization_factor);
352       // Weighted average of matrices.
353       angled_cov_mat.Scale(kBalance);
354       interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
355     }
356   }
357 }
358 
NormalizeCovMats()359 void NonlinearBeamformer::NormalizeCovMats() {
360   for (size_t i = 0; i < kNumFreqBins; ++i) {
361     rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
362     rpsiws_[i].clear();
363     for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
364       rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
365     }
366   }
367 }
368 
ProcessChunk(const ChannelBuffer<float> & input,ChannelBuffer<float> * output)369 void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
370                                        ChannelBuffer<float>* output) {
371   RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
372   RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
373 
374   float old_high_pass_mask = high_pass_postfilter_mask_;
375   lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
376   // Ramp up/down for smoothing. 1 mask per 10ms results in audible
377   // discontinuities.
378   const float ramp_increment =
379       (high_pass_postfilter_mask_ - old_high_pass_mask) /
380       input.num_frames_per_band();
381   // Apply the smoothed high-pass mask to the first channel of each band.
382   // This can be done because the effect of the linear beamformer is negligible
383   // compared to the post-filter.
384   for (size_t i = 1; i < input.num_bands(); ++i) {
385     float smoothed_mask = old_high_pass_mask;
386     for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
387       smoothed_mask += ramp_increment;
388       output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
389     }
390   }
391 }
392 
AimAt(const SphericalPointf & target_direction)393 void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
394   target_angle_radians_ = target_direction.azimuth();
395   InitHighFrequencyCorrectionRanges();
396   InitInterfAngles();
397   InitDelaySumMasks();
398   InitTargetCovMats();
399   InitInterfCovMats();
400   NormalizeCovMats();
401 }
402 
IsInBeam(const SphericalPointf & spherical_point)403 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
404   // If more than half-beamwidth degrees away from the beam's center,
405   // you are out of the beam.
406   return fabs(spherical_point.azimuth() - target_angle_radians_) <
407          kHalfBeamWidthRadians;
408 }
409 
ProcessAudioBlock(const complex_f * const * input,size_t num_input_channels,size_t num_freq_bins,size_t num_output_channels,complex_f * const * output)410 void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
411                                             size_t num_input_channels,
412                                             size_t num_freq_bins,
413                                             size_t num_output_channels,
414                                             complex_f* const* output) {
415   RTC_CHECK_EQ(kNumFreqBins, num_freq_bins);
416   RTC_CHECK_EQ(num_input_channels_, num_input_channels);
417   RTC_CHECK_EQ(1u, num_output_channels);
418 
419   // Calculating the post-filter masks. Note that we need two for each
420   // frequency bin to account for the positive and negative interferer
421   // angle.
422   for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
423     eig_m_.CopyFromColumn(input, i, num_input_channels_);
424     float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
425     if (eig_m_norm_factor != 0.f) {
426       eig_m_.Scale(1.f / eig_m_norm_factor);
427     }
428 
429     float rxim = Norm(target_cov_mats_[i], eig_m_);
430     float ratio_rxiw_rxim = 0.f;
431     if (rxim > 0.f) {
432       ratio_rxiw_rxim = rxiws_[i] / rxim;
433     }
434 
435     complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
436     rmw *= rmw;
437     float rmw_r = rmw.real();
438 
439     new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
440                                            rpsiws_[i][0],
441                                            ratio_rxiw_rxim,
442                                            rmw_r);
443     for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
444       float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
445                                                rpsiws_[i][j],
446                                                ratio_rxiw_rxim,
447                                                rmw_r);
448       if (tmp_mask < new_mask_[i]) {
449         new_mask_[i] = tmp_mask;
450       }
451     }
452   }
453 
454   ApplyMaskTimeSmoothing();
455   EstimateTargetPresence();
456   ApplyLowFrequencyCorrection();
457   ApplyHighFrequencyCorrection();
458   ApplyMaskFrequencySmoothing();
459   ApplyMasks(input, output);
460 }
461 
CalculatePostfilterMask(const ComplexMatrixF & interf_cov_mat,float rpsiw,float ratio_rxiw_rxim,float rmw_r)462 float NonlinearBeamformer::CalculatePostfilterMask(
463     const ComplexMatrixF& interf_cov_mat,
464     float rpsiw,
465     float ratio_rxiw_rxim,
466     float rmw_r) {
467   float rpsim = Norm(interf_cov_mat, eig_m_);
468 
469   float ratio = 0.f;
470   if (rpsim > 0.f) {
471     ratio = rpsiw / rpsim;
472   }
473 
474   return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /
475          (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
476 }
477 
ApplyMasks(const complex_f * const * input,complex_f * const * output)478 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
479                                      complex_f* const* output) {
480   complex_f* output_channel = output[0];
481   for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
482     output_channel[f_ix] = complex_f(0.f, 0.f);
483 
484     const complex_f* delay_sum_mask_els =
485         normalized_delay_sum_masks_[f_ix].elements()[0];
486     for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
487       output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
488     }
489 
490     output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
491   }
492 }
493 
494 // Smooth new_mask_ into time_smooth_mask_.
ApplyMaskTimeSmoothing()495 void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
496   for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
497     time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
498                            (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
499   }
500 }
501 
502 // Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
ApplyMaskFrequencySmoothing()503 void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
504   // Smooth over frequency in both directions. The "frequency correction"
505   // regions have constant value, but we enter them to smooth over the jump
506   // that exists at the boundary. However, this does mean when smoothing "away"
507   // from the region that we only need to use the last element.
508   //
509   // Upward smoothing:
510   //   low_mean_start_bin_
511   //         v
512   // |------|------------|------|
513   //       ^------------------>^
514   //
515   // Downward smoothing:
516   //         high_mean_end_bin_
517   //                    v
518   // |------|------------|------|
519   //  ^<------------------^
520   std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
521   for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
522     final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
523                      (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
524   }
525   for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
526     final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
527                          (1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
528   }
529 }
530 
531 // Apply low frequency correction to time_smooth_mask_.
ApplyLowFrequencyCorrection()532 void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
533   const float low_frequency_mask =
534       MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
535   std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
536             low_frequency_mask);
537 }
538 
539 // Apply high frequency correction to time_smooth_mask_. Update
540 // high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
ApplyHighFrequencyCorrection()541 void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
542   high_pass_postfilter_mask_ =
543       MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
544   std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
545             time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
546 }
547 
548 // Compute mean over the given range of time_smooth_mask_, [first, last).
MaskRangeMean(size_t first,size_t last)549 float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
550   RTC_DCHECK_GT(last, first);
551   const float sum = std::accumulate(time_smooth_mask_ + first,
552                                     time_smooth_mask_ + last, 0.f);
553   return sum / (last - first);
554 }
555 
EstimateTargetPresence()556 void NonlinearBeamformer::EstimateTargetPresence() {
557   const size_t quantile = static_cast<size_t>(
558       (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
559       low_mean_start_bin_);
560   std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
561                    new_mask_ + high_mean_end_bin_ + 1);
562   if (new_mask_[quantile] > kMaskTargetThreshold) {
563     is_target_present_ = true;
564     interference_blocks_count_ = 0;
565   } else {
566     is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
567   }
568 }
569 
570 }  // namespace webrtc
571