• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
12 
13 #include <algorithm>
14 #include <cmath>
15 #include <cstddef>
16 
17 #include "rtc_base/checks.h"
18 
19 namespace webrtc {
20 namespace rnn_vad {
21 namespace {
22 
23 // Weights for each FFT coefficient for each Opus band (Nyquist frequency
24 // excluded). The size of each band is specified in
25 // |kOpusScaleNumBins24kHz20ms|.
26 constexpr std::array<float, kFrameSize20ms24kHz / 2> kOpusBandWeights24kHz20ms =
27     {{
28         0.f,       0.25f,      0.5f,       0.75f,  // Band 0
29         0.f,       0.25f,      0.5f,       0.75f,  // Band 1
30         0.f,       0.25f,      0.5f,       0.75f,  // Band 2
31         0.f,       0.25f,      0.5f,       0.75f,  // Band 3
32         0.f,       0.25f,      0.5f,       0.75f,  // Band 4
33         0.f,       0.25f,      0.5f,       0.75f,  // Band 5
34         0.f,       0.25f,      0.5f,       0.75f,  // Band 6
35         0.f,       0.25f,      0.5f,       0.75f,  // Band 7
36         0.f,       0.125f,     0.25f,      0.375f,    0.5f,
37         0.625f,    0.75f,      0.875f,  // Band 8
38         0.f,       0.125f,     0.25f,      0.375f,    0.5f,
39         0.625f,    0.75f,      0.875f,  // Band 9
40         0.f,       0.125f,     0.25f,      0.375f,    0.5f,
41         0.625f,    0.75f,      0.875f,  // Band 10
42         0.f,       0.125f,     0.25f,      0.375f,    0.5f,
43         0.625f,    0.75f,      0.875f,  // Band 11
44         0.f,       0.0625f,    0.125f,     0.1875f,   0.25f,
45         0.3125f,   0.375f,     0.4375f,    0.5f,      0.5625f,
46         0.625f,    0.6875f,    0.75f,      0.8125f,   0.875f,
47         0.9375f,  // Band 12
48         0.f,       0.0625f,    0.125f,     0.1875f,   0.25f,
49         0.3125f,   0.375f,     0.4375f,    0.5f,      0.5625f,
50         0.625f,    0.6875f,    0.75f,      0.8125f,   0.875f,
51         0.9375f,  // Band 13
52         0.f,       0.0625f,    0.125f,     0.1875f,   0.25f,
53         0.3125f,   0.375f,     0.4375f,    0.5f,      0.5625f,
54         0.625f,    0.6875f,    0.75f,      0.8125f,   0.875f,
55         0.9375f,  // Band 14
56         0.f,       0.0416667f, 0.0833333f, 0.125f,    0.166667f,
57         0.208333f, 0.25f,      0.291667f,  0.333333f, 0.375f,
58         0.416667f, 0.458333f,  0.5f,       0.541667f, 0.583333f,
59         0.625f,    0.666667f,  0.708333f,  0.75f,     0.791667f,
60         0.833333f, 0.875f,     0.916667f,  0.958333f,  // Band 15
61         0.f,       0.0416667f, 0.0833333f, 0.125f,    0.166667f,
62         0.208333f, 0.25f,      0.291667f,  0.333333f, 0.375f,
63         0.416667f, 0.458333f,  0.5f,       0.541667f, 0.583333f,
64         0.625f,    0.666667f,  0.708333f,  0.75f,     0.791667f,
65         0.833333f, 0.875f,     0.916667f,  0.958333f,  // Band 16
66         0.f,       0.03125f,   0.0625f,    0.09375f,  0.125f,
67         0.15625f,  0.1875f,    0.21875f,   0.25f,     0.28125f,
68         0.3125f,   0.34375f,   0.375f,     0.40625f,  0.4375f,
69         0.46875f,  0.5f,       0.53125f,   0.5625f,   0.59375f,
70         0.625f,    0.65625f,   0.6875f,    0.71875f,  0.75f,
71         0.78125f,  0.8125f,    0.84375f,   0.875f,    0.90625f,
72         0.9375f,   0.96875f,  // Band 17
73         0.f,       0.0208333f, 0.0416667f, 0.0625f,   0.0833333f,
74         0.104167f, 0.125f,     0.145833f,  0.166667f, 0.1875f,
75         0.208333f, 0.229167f,  0.25f,      0.270833f, 0.291667f,
76         0.3125f,   0.333333f,  0.354167f,  0.375f,    0.395833f,
77         0.416667f, 0.4375f,    0.458333f,  0.479167f, 0.5f,
78         0.520833f, 0.541667f,  0.5625f,    0.583333f, 0.604167f,
79         0.625f,    0.645833f,  0.666667f,  0.6875f,   0.708333f,
80         0.729167f, 0.75f,      0.770833f,  0.791667f, 0.8125f,
81         0.833333f, 0.854167f,  0.875f,     0.895833f, 0.916667f,
82         0.9375f,   0.958333f,  0.979167f  // Band 18
83     }};
84 
85 }  // namespace
86 
SpectralCorrelator()87 SpectralCorrelator::SpectralCorrelator()
88     : weights_(kOpusBandWeights24kHz20ms.begin(),
89                kOpusBandWeights24kHz20ms.end()) {}
90 
91 SpectralCorrelator::~SpectralCorrelator() = default;
92 
ComputeAutoCorrelation(rtc::ArrayView<const float> x,rtc::ArrayView<float,kOpusBands24kHz> auto_corr) const93 void SpectralCorrelator::ComputeAutoCorrelation(
94     rtc::ArrayView<const float> x,
95     rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const {
96   ComputeCrossCorrelation(x, x, auto_corr);
97 }
98 
ComputeCrossCorrelation(rtc::ArrayView<const float> x,rtc::ArrayView<const float> y,rtc::ArrayView<float,kOpusBands24kHz> cross_corr) const99 void SpectralCorrelator::ComputeCrossCorrelation(
100     rtc::ArrayView<const float> x,
101     rtc::ArrayView<const float> y,
102     rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const {
103   RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz);
104   RTC_DCHECK_EQ(x.size(), y.size());
105   RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed.";
106   RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed.";
107   constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms();
108   size_t k = 0;  // Next Fourier coefficient index.
109   cross_corr[0] = 0.f;
110   for (size_t i = 0; i < kOpusBands24kHz - 1; ++i) {
111     cross_corr[i + 1] = 0.f;
112     for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) {  // Band size.
113       const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1];
114       const float tmp = weights_[k] * v;
115       cross_corr[i] += v - tmp;
116       cross_corr[i + 1] += tmp;
117       k++;
118     }
119   }
120   cross_corr[0] *= 2.f;  // The first band only gets half contribution.
121   RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2);  // Nyquist coefficient never used.
122 }
123 
ComputeSmoothedLogMagnitudeSpectrum(rtc::ArrayView<const float> bands_energy,rtc::ArrayView<float,kNumBands> log_bands_energy)124 void ComputeSmoothedLogMagnitudeSpectrum(
125     rtc::ArrayView<const float> bands_energy,
126     rtc::ArrayView<float, kNumBands> log_bands_energy) {
127   RTC_DCHECK_LE(bands_energy.size(), kNumBands);
128   constexpr float kOneByHundred = 1e-2f;
129   constexpr float kLogOneByHundred = -2.f;
130   // Init.
131   float log_max = kLogOneByHundred;
132   float follow = kLogOneByHundred;
133   const auto smooth = [&log_max, &follow](float x) {
134     x = std::max(log_max - 7.f, std::max(follow - 1.5f, x));
135     log_max = std::max(log_max, x);
136     follow = std::max(follow - 1.5f, x);
137     return x;
138   };
139   // Smoothing over the bands for which the band energy is defined.
140   for (size_t i = 0; i < bands_energy.size(); ++i) {
141     log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i]));
142   }
143   // Smoothing over the remaining bands (zero energy).
144   for (size_t i = bands_energy.size(); i < kNumBands; ++i) {
145     log_bands_energy[i] = smooth(kLogOneByHundred);
146   }
147 }
148 
ComputeDctTable()149 std::array<float, kNumBands * kNumBands> ComputeDctTable() {
150   std::array<float, kNumBands * kNumBands> dct_table;
151   const double k = std::sqrt(0.5);
152   for (size_t i = 0; i < kNumBands; ++i) {
153     for (size_t j = 0; j < kNumBands; ++j)
154       dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands);
155     dct_table[i * kNumBands] *= k;
156   }
157   return dct_table;
158 }
159 
ComputeDct(rtc::ArrayView<const float> in,rtc::ArrayView<const float,kNumBands * kNumBands> dct_table,rtc::ArrayView<float> out)160 void ComputeDct(rtc::ArrayView<const float> in,
161                 rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
162                 rtc::ArrayView<float> out) {
163   // DCT scaling factor - i.e., sqrt(2 / kNumBands).
164   constexpr float kDctScalingFactor = 0.301511345f;
165   constexpr float kDctScalingFactorError =
166       kDctScalingFactor * kDctScalingFactor -
167       2.f / static_cast<float>(kNumBands);
168   static_assert(
169       (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) ||
170           (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f),
171       "kNumBands changed and kDctScalingFactor has not been updated.");
172   RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported.";
173   RTC_DCHECK_LE(in.size(), kNumBands);
174   RTC_DCHECK_LE(1, out.size());
175   RTC_DCHECK_LE(out.size(), in.size());
176   for (size_t i = 0; i < out.size(); ++i) {
177     out[i] = 0.f;
178     for (size_t j = 0; j < in.size(); ++j) {
179       out[i] += in[j] * dct_table[j * kNumBands + i];
180     }
181     // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table.
182     out[i] *= kDctScalingFactor;
183   }
184 }
185 
186 }  // namespace rnn_vad
187 }  // namespace webrtc
188