1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/rnn_vad/spectral_features_internal.h"
12
13 #include <algorithm>
14 #include <cmath>
15 #include <cstddef>
16
17 #include "rtc_base/checks.h"
18
19 namespace webrtc {
20 namespace rnn_vad {
21 namespace {
22
23 // Weights for each FFT coefficient for each Opus band (Nyquist frequency
24 // excluded). The size of each band is specified in
25 // |kOpusScaleNumBins24kHz20ms|.
26 constexpr std::array<float, kFrameSize20ms24kHz / 2> kOpusBandWeights24kHz20ms =
27 {{
28 0.f, 0.25f, 0.5f, 0.75f, // Band 0
29 0.f, 0.25f, 0.5f, 0.75f, // Band 1
30 0.f, 0.25f, 0.5f, 0.75f, // Band 2
31 0.f, 0.25f, 0.5f, 0.75f, // Band 3
32 0.f, 0.25f, 0.5f, 0.75f, // Band 4
33 0.f, 0.25f, 0.5f, 0.75f, // Band 5
34 0.f, 0.25f, 0.5f, 0.75f, // Band 6
35 0.f, 0.25f, 0.5f, 0.75f, // Band 7
36 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
37 0.625f, 0.75f, 0.875f, // Band 8
38 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
39 0.625f, 0.75f, 0.875f, // Band 9
40 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
41 0.625f, 0.75f, 0.875f, // Band 10
42 0.f, 0.125f, 0.25f, 0.375f, 0.5f,
43 0.625f, 0.75f, 0.875f, // Band 11
44 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
45 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
46 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
47 0.9375f, // Band 12
48 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
49 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
50 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
51 0.9375f, // Band 13
52 0.f, 0.0625f, 0.125f, 0.1875f, 0.25f,
53 0.3125f, 0.375f, 0.4375f, 0.5f, 0.5625f,
54 0.625f, 0.6875f, 0.75f, 0.8125f, 0.875f,
55 0.9375f, // Band 14
56 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f,
57 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f,
58 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f,
59 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f,
60 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 15
61 0.f, 0.0416667f, 0.0833333f, 0.125f, 0.166667f,
62 0.208333f, 0.25f, 0.291667f, 0.333333f, 0.375f,
63 0.416667f, 0.458333f, 0.5f, 0.541667f, 0.583333f,
64 0.625f, 0.666667f, 0.708333f, 0.75f, 0.791667f,
65 0.833333f, 0.875f, 0.916667f, 0.958333f, // Band 16
66 0.f, 0.03125f, 0.0625f, 0.09375f, 0.125f,
67 0.15625f, 0.1875f, 0.21875f, 0.25f, 0.28125f,
68 0.3125f, 0.34375f, 0.375f, 0.40625f, 0.4375f,
69 0.46875f, 0.5f, 0.53125f, 0.5625f, 0.59375f,
70 0.625f, 0.65625f, 0.6875f, 0.71875f, 0.75f,
71 0.78125f, 0.8125f, 0.84375f, 0.875f, 0.90625f,
72 0.9375f, 0.96875f, // Band 17
73 0.f, 0.0208333f, 0.0416667f, 0.0625f, 0.0833333f,
74 0.104167f, 0.125f, 0.145833f, 0.166667f, 0.1875f,
75 0.208333f, 0.229167f, 0.25f, 0.270833f, 0.291667f,
76 0.3125f, 0.333333f, 0.354167f, 0.375f, 0.395833f,
77 0.416667f, 0.4375f, 0.458333f, 0.479167f, 0.5f,
78 0.520833f, 0.541667f, 0.5625f, 0.583333f, 0.604167f,
79 0.625f, 0.645833f, 0.666667f, 0.6875f, 0.708333f,
80 0.729167f, 0.75f, 0.770833f, 0.791667f, 0.8125f,
81 0.833333f, 0.854167f, 0.875f, 0.895833f, 0.916667f,
82 0.9375f, 0.958333f, 0.979167f // Band 18
83 }};
84
85 } // namespace
86
SpectralCorrelator()87 SpectralCorrelator::SpectralCorrelator()
88 : weights_(kOpusBandWeights24kHz20ms.begin(),
89 kOpusBandWeights24kHz20ms.end()) {}
90
91 SpectralCorrelator::~SpectralCorrelator() = default;
92
ComputeAutoCorrelation(rtc::ArrayView<const float> x,rtc::ArrayView<float,kOpusBands24kHz> auto_corr) const93 void SpectralCorrelator::ComputeAutoCorrelation(
94 rtc::ArrayView<const float> x,
95 rtc::ArrayView<float, kOpusBands24kHz> auto_corr) const {
96 ComputeCrossCorrelation(x, x, auto_corr);
97 }
98
ComputeCrossCorrelation(rtc::ArrayView<const float> x,rtc::ArrayView<const float> y,rtc::ArrayView<float,kOpusBands24kHz> cross_corr) const99 void SpectralCorrelator::ComputeCrossCorrelation(
100 rtc::ArrayView<const float> x,
101 rtc::ArrayView<const float> y,
102 rtc::ArrayView<float, kOpusBands24kHz> cross_corr) const {
103 RTC_DCHECK_EQ(x.size(), kFrameSize20ms24kHz);
104 RTC_DCHECK_EQ(x.size(), y.size());
105 RTC_DCHECK_EQ(x[1], 0.f) << "The Nyquist coefficient must be zeroed.";
106 RTC_DCHECK_EQ(y[1], 0.f) << "The Nyquist coefficient must be zeroed.";
107 constexpr auto kOpusScaleNumBins24kHz20ms = GetOpusScaleNumBins24kHz20ms();
108 size_t k = 0; // Next Fourier coefficient index.
109 cross_corr[0] = 0.f;
110 for (size_t i = 0; i < kOpusBands24kHz - 1; ++i) {
111 cross_corr[i + 1] = 0.f;
112 for (int j = 0; j < kOpusScaleNumBins24kHz20ms[i]; ++j) { // Band size.
113 const float v = x[2 * k] * y[2 * k] + x[2 * k + 1] * y[2 * k + 1];
114 const float tmp = weights_[k] * v;
115 cross_corr[i] += v - tmp;
116 cross_corr[i + 1] += tmp;
117 k++;
118 }
119 }
120 cross_corr[0] *= 2.f; // The first band only gets half contribution.
121 RTC_DCHECK_EQ(k, kFrameSize20ms24kHz / 2); // Nyquist coefficient never used.
122 }
123
ComputeSmoothedLogMagnitudeSpectrum(rtc::ArrayView<const float> bands_energy,rtc::ArrayView<float,kNumBands> log_bands_energy)124 void ComputeSmoothedLogMagnitudeSpectrum(
125 rtc::ArrayView<const float> bands_energy,
126 rtc::ArrayView<float, kNumBands> log_bands_energy) {
127 RTC_DCHECK_LE(bands_energy.size(), kNumBands);
128 constexpr float kOneByHundred = 1e-2f;
129 constexpr float kLogOneByHundred = -2.f;
130 // Init.
131 float log_max = kLogOneByHundred;
132 float follow = kLogOneByHundred;
133 const auto smooth = [&log_max, &follow](float x) {
134 x = std::max(log_max - 7.f, std::max(follow - 1.5f, x));
135 log_max = std::max(log_max, x);
136 follow = std::max(follow - 1.5f, x);
137 return x;
138 };
139 // Smoothing over the bands for which the band energy is defined.
140 for (size_t i = 0; i < bands_energy.size(); ++i) {
141 log_bands_energy[i] = smooth(std::log10(kOneByHundred + bands_energy[i]));
142 }
143 // Smoothing over the remaining bands (zero energy).
144 for (size_t i = bands_energy.size(); i < kNumBands; ++i) {
145 log_bands_energy[i] = smooth(kLogOneByHundred);
146 }
147 }
148
ComputeDctTable()149 std::array<float, kNumBands * kNumBands> ComputeDctTable() {
150 std::array<float, kNumBands * kNumBands> dct_table;
151 const double k = std::sqrt(0.5);
152 for (size_t i = 0; i < kNumBands; ++i) {
153 for (size_t j = 0; j < kNumBands; ++j)
154 dct_table[i * kNumBands + j] = std::cos((i + 0.5) * j * kPi / kNumBands);
155 dct_table[i * kNumBands] *= k;
156 }
157 return dct_table;
158 }
159
ComputeDct(rtc::ArrayView<const float> in,rtc::ArrayView<const float,kNumBands * kNumBands> dct_table,rtc::ArrayView<float> out)160 void ComputeDct(rtc::ArrayView<const float> in,
161 rtc::ArrayView<const float, kNumBands * kNumBands> dct_table,
162 rtc::ArrayView<float> out) {
163 // DCT scaling factor - i.e., sqrt(2 / kNumBands).
164 constexpr float kDctScalingFactor = 0.301511345f;
165 constexpr float kDctScalingFactorError =
166 kDctScalingFactor * kDctScalingFactor -
167 2.f / static_cast<float>(kNumBands);
168 static_assert(
169 (kDctScalingFactorError >= 0.f && kDctScalingFactorError < 1e-1f) ||
170 (kDctScalingFactorError < 0.f && kDctScalingFactorError > -1e-1f),
171 "kNumBands changed and kDctScalingFactor has not been updated.");
172 RTC_DCHECK_NE(in.data(), out.data()) << "In-place DCT is not supported.";
173 RTC_DCHECK_LE(in.size(), kNumBands);
174 RTC_DCHECK_LE(1, out.size());
175 RTC_DCHECK_LE(out.size(), in.size());
176 for (size_t i = 0; i < out.size(); ++i) {
177 out[i] = 0.f;
178 for (size_t j = 0; j < in.size(); ++j) {
179 out[i] += in[j] * dct_table[j * kNumBands + i];
180 }
181 // TODO(bugs.webrtc.org/10480): Scaling factor in the DCT table.
182 out[i] *= kDctScalingFactor;
183 }
184 }
185
186 } // namespace rnn_vad
187 } // namespace webrtc
188