1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_coding/codecs/cng/webrtc_cng.h"
12
13 #include <algorithm>
14
15 #include "common_audio/signal_processing/include/signal_processing_library.h"
16 #include "rtc_base/checks.h"
17 #include "rtc_base/numerics/safe_conversions.h"
18
19 namespace webrtc {
20
21 namespace {
22
23 const size_t kCngMaxOutsizeOrder = 640;
24
25 // TODO(ossu): Rename the left-over WebRtcCng according to style guide.
26 void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a);
27
28 const int32_t WebRtcCng_kDbov[94] = {
29 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992,
30 271562548, 215709799, 171344384, 136103682, 108110997, 85875618,
31 68213428, 54183852, 43039763, 34187699, 27156255, 21570980,
32 17134438, 13610368, 10811100, 8587562, 6821343, 5418385,
33 4303976, 3418770, 2715625, 2157098, 1713444, 1361037,
34 1081110, 858756, 682134, 541839, 430398, 341877,
35 271563, 215710, 171344, 136104, 108111, 85876,
36 68213, 54184, 43040, 34188, 27156, 21571,
37 17134, 13610, 10811, 8588, 6821, 5418,
38 4304, 3419, 2716, 2157, 1713, 1361,
39 1081, 859, 682, 542, 430, 342,
40 272, 216, 171, 136, 108, 86,
41 68, 54, 43, 34, 27, 22,
42 17, 14, 11, 9, 7, 5,
43 4, 3, 3, 2, 2, 1,
44 1, 1, 1, 1};
45
46 const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = {
47 32702, 32636, 32570, 32505, 32439, 32374,
48 32309, 32244, 32179, 32114, 32049, 31985};
49
50 } // namespace
51
ComfortNoiseDecoder()52 ComfortNoiseDecoder::ComfortNoiseDecoder() {
53 /* Needed to get the right function pointers in SPLIB. */
54 Reset();
55 }
56
Reset()57 void ComfortNoiseDecoder::Reset() {
58 dec_seed_ = 7777; /* For debugging only. */
59 dec_target_energy_ = 0;
60 dec_used_energy_ = 0;
61 for (auto& c : dec_target_reflCoefs_)
62 c = 0;
63 for (auto& c : dec_used_reflCoefs_)
64 c = 0;
65 for (auto& c : dec_filtstate_)
66 c = 0;
67 for (auto& c : dec_filtstateLow_)
68 c = 0;
69 dec_order_ = 5;
70 dec_target_scale_factor_ = 0;
71 dec_used_scale_factor_ = 0;
72 }
73
UpdateSid(rtc::ArrayView<const uint8_t> sid)74 void ComfortNoiseDecoder::UpdateSid(rtc::ArrayView<const uint8_t> sid) {
75 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER];
76 int32_t targetEnergy;
77 size_t length = sid.size();
78 /* Throw away reflection coefficients of higher order than we can handle. */
79 if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1))
80 length = WEBRTC_CNG_MAX_LPC_ORDER + 1;
81
82 dec_order_ = static_cast<uint16_t>(length - 1);
83
84 uint8_t sid0 = std::min<uint8_t>(sid[0], 93);
85 targetEnergy = WebRtcCng_kDbov[sid0];
86 /* Take down target energy to 75%. */
87 targetEnergy = targetEnergy >> 1;
88 targetEnergy += targetEnergy >> 2;
89
90 dec_target_energy_ = targetEnergy;
91
92 /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */
93 if (dec_order_ == WEBRTC_CNG_MAX_LPC_ORDER) {
94 for (size_t i = 0; i < (dec_order_); i++) {
95 refCs[i] = sid[i + 1] << 8; /* Q7 to Q15*/
96 dec_target_reflCoefs_[i] = refCs[i];
97 }
98 } else {
99 for (size_t i = 0; i < (dec_order_); i++) {
100 refCs[i] = (sid[i + 1] - 127) * (1 << 8); /* Q7 to Q15. */
101 dec_target_reflCoefs_[i] = refCs[i];
102 }
103 }
104
105 for (size_t i = (dec_order_); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
106 refCs[i] = 0;
107 dec_target_reflCoefs_[i] = refCs[i];
108 }
109 }
110
Generate(rtc::ArrayView<int16_t> out_data,bool new_period)111 bool ComfortNoiseDecoder::Generate(rtc::ArrayView<int16_t> out_data,
112 bool new_period) {
113 int16_t excitation[kCngMaxOutsizeOrder];
114 int16_t low[kCngMaxOutsizeOrder];
115 int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1];
116 int16_t ReflBetaStd = 26214; /* 0.8 in q15. */
117 int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */
118 int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */
119 int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */
120 int16_t Beta, BetaC; /* These are in Q15. */
121 int32_t targetEnergy;
122 int16_t En;
123 int16_t temp16;
124 const size_t num_samples = out_data.size();
125
126 if (num_samples > kCngMaxOutsizeOrder) {
127 return false;
128 }
129
130 if (new_period) {
131 dec_used_scale_factor_ = dec_target_scale_factor_;
132 Beta = ReflBetaNewP;
133 BetaC = ReflBetaCompNewP;
134 } else {
135 Beta = ReflBetaStd;
136 BetaC = ReflBetaCompStd;
137 }
138
139 /* Calculate new scale factor in Q13 */
140 dec_used_scale_factor_ = rtc::checked_cast<int16_t>(
141 WEBRTC_SPL_MUL_16_16_RSFT(dec_used_scale_factor_, Beta >> 2, 13) +
142 WEBRTC_SPL_MUL_16_16_RSFT(dec_target_scale_factor_, BetaC >> 2, 13));
143
144 dec_used_energy_ = dec_used_energy_ >> 1;
145 dec_used_energy_ += dec_target_energy_ >> 1;
146
147 /* Do the same for the reflection coeffs, albeit in Q15. */
148 for (size_t i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) {
149 dec_used_reflCoefs_[i] =
150 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i], Beta, 15);
151 dec_used_reflCoefs_[i] +=
152 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_target_reflCoefs_[i], BetaC, 15);
153 }
154
155 /* Compute the polynomial coefficients. */
156 WebRtcCng_K2a16(dec_used_reflCoefs_, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly);
157
158 targetEnergy = dec_used_energy_;
159
160 /* Calculate scaling factor based on filter energy. */
161 En = 8192; /* 1.0 in Q13. */
162 for (size_t i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) {
163 /* Floating point value for reference.
164 E *= 1.0 - (dec_used_reflCoefs_[i] / 32768.0) *
165 (dec_used_reflCoefs_[i] / 32768.0);
166 */
167
168 /* Same in fixed point. */
169 /* K(i).^2 in Q15. */
170 temp16 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(dec_used_reflCoefs_[i],
171 dec_used_reflCoefs_[i], 15);
172 /* 1 - K(i).^2 in Q15. */
173 temp16 = 0x7fff - temp16;
174 En = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15);
175 }
176
177 /* float scaling= sqrt(E * dec_target_energy_ / (1 << 24)); */
178
179 /* Calculate sqrt(En * target_energy / excitation energy) */
180 targetEnergy = WebRtcSpl_Sqrt(dec_used_energy_);
181
182 En = (int16_t)WebRtcSpl_Sqrt(En) << 6;
183 En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */
184 dec_used_scale_factor_ = (int16_t)((En * targetEnergy) >> 12);
185
186 /* Generate excitation. */
187 /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */
188 for (size_t i = 0; i < num_samples; i++) {
189 excitation[i] = WebRtcSpl_RandN(&dec_seed_) >> 1;
190 }
191
192 /* Scale to correct energy. */
193 WebRtcSpl_ScaleVector(excitation, excitation, dec_used_scale_factor_,
194 num_samples, 13);
195
196 /* `lpPoly` - Coefficients in Q12.
197 * `excitation` - Speech samples.
198 * `nst->dec_filtstate` - State preservation.
199 * `out_data` - Filtered speech samples. */
200 WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation,
201 num_samples, dec_filtstate_, WEBRTC_CNG_MAX_LPC_ORDER,
202 dec_filtstateLow_, WEBRTC_CNG_MAX_LPC_ORDER,
203 out_data.data(), low, num_samples);
204
205 return true;
206 }
207
ComfortNoiseEncoder(int fs,int interval,int quality)208 ComfortNoiseEncoder::ComfortNoiseEncoder(int fs, int interval, int quality)
209 : enc_nrOfCoefs_(quality),
210 enc_sampfreq_(fs),
211 enc_interval_(interval),
212 enc_msSinceSid_(0),
213 enc_Energy_(0),
214 enc_reflCoefs_{0},
215 enc_corrVector_{0},
216 enc_seed_(7777) /* For debugging only. */ {
217 RTC_CHECK_GT(quality, 0);
218 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
219 }
220
Reset(int fs,int interval,int quality)221 void ComfortNoiseEncoder::Reset(int fs, int interval, int quality) {
222 RTC_CHECK_GT(quality, 0);
223 RTC_CHECK_LE(quality, WEBRTC_CNG_MAX_LPC_ORDER);
224 enc_nrOfCoefs_ = quality;
225 enc_sampfreq_ = fs;
226 enc_interval_ = interval;
227 enc_msSinceSid_ = 0;
228 enc_Energy_ = 0;
229 for (auto& c : enc_reflCoefs_)
230 c = 0;
231 for (auto& c : enc_corrVector_)
232 c = 0;
233 enc_seed_ = 7777; /* For debugging only. */
234 }
235
Encode(rtc::ArrayView<const int16_t> speech,bool force_sid,rtc::Buffer * output)236 size_t ComfortNoiseEncoder::Encode(rtc::ArrayView<const int16_t> speech,
237 bool force_sid,
238 rtc::Buffer* output) {
239 int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
240 int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1];
241 int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1];
242 int16_t hanningW[kCngMaxOutsizeOrder];
243 int16_t ReflBeta = 19661; /* 0.6 in q15. */
244 int16_t ReflBetaComp = 13107; /* 0.4 in q15. */
245 int32_t outEnergy;
246 int outShifts;
247 size_t i;
248 int stab;
249 int acorrScale;
250 size_t index;
251 size_t ind, factor;
252 int32_t* bptr;
253 int32_t blo, bhi;
254 int16_t negate;
255 const int16_t* aptr;
256 int16_t speechBuf[kCngMaxOutsizeOrder];
257
258 const size_t num_samples = speech.size();
259 RTC_CHECK_LE(num_samples, kCngMaxOutsizeOrder);
260
261 for (i = 0; i < num_samples; i++) {
262 speechBuf[i] = speech[i];
263 }
264
265 factor = num_samples;
266
267 /* Calculate energy and a coefficients. */
268 outEnergy = WebRtcSpl_Energy(speechBuf, num_samples, &outShifts);
269 while (outShifts > 0) {
270 /* We can only do 5 shifts without destroying accuracy in
271 * division factor. */
272 if (outShifts > 5) {
273 outEnergy <<= (outShifts - 5);
274 outShifts = 5;
275 } else {
276 factor /= 2;
277 outShifts--;
278 }
279 }
280 outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor);
281
282 if (outEnergy > 1) {
283 /* Create Hanning Window. */
284 WebRtcSpl_GetHanningWindow(hanningW, num_samples / 2);
285 for (i = 0; i < (num_samples / 2); i++)
286 hanningW[num_samples - i - 1] = hanningW[i];
287
288 WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, num_samples,
289 14);
290
291 WebRtcSpl_AutoCorrelation(speechBuf, num_samples, enc_nrOfCoefs_,
292 corrVector, &acorrScale);
293
294 if (*corrVector == 0)
295 *corrVector = WEBRTC_SPL_WORD16_MAX;
296
297 /* Adds the bandwidth expansion. */
298 aptr = WebRtcCng_kCorrWindow;
299 bptr = corrVector;
300
301 /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */
302 for (ind = 0; ind < enc_nrOfCoefs_; ind++) {
303 /* The below code multiplies the 16 b corrWindow values (Q15) with
304 * the 32 b corrvector (Q0) and shifts the result down 15 steps. */
305 negate = *bptr < 0;
306 if (negate)
307 *bptr = -*bptr;
308
309 blo = (int32_t)*aptr * (*bptr & 0xffff);
310 bhi = ((blo >> 16) & 0xffff) +
311 ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff));
312 blo = (blo & 0xffff) | ((bhi & 0xffff) << 16);
313
314 *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t)blo >> 15);
315 if (negate)
316 *bptr = -*bptr;
317 bptr++;
318 }
319 /* End of bandwidth expansion. */
320
321 stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, enc_nrOfCoefs_);
322
323 if (!stab) {
324 /* Disregard from this frame */
325 return 0;
326 }
327
328 } else {
329 for (i = 0; i < enc_nrOfCoefs_; i++)
330 refCs[i] = 0;
331 }
332
333 if (force_sid) {
334 /* Read instantaneous values instead of averaged. */
335 for (i = 0; i < enc_nrOfCoefs_; i++)
336 enc_reflCoefs_[i] = refCs[i];
337 enc_Energy_ = outEnergy;
338 } else {
339 /* Average history with new values. */
340 for (i = 0; i < enc_nrOfCoefs_; i++) {
341 enc_reflCoefs_[i] =
342 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(enc_reflCoefs_[i], ReflBeta, 15);
343 enc_reflCoefs_[i] +=
344 (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(refCs[i], ReflBetaComp, 15);
345 }
346 enc_Energy_ = (outEnergy >> 2) + (enc_Energy_ >> 1) + (enc_Energy_ >> 2);
347 }
348
349 if (enc_Energy_ < 1) {
350 enc_Energy_ = 1;
351 }
352
353 if ((enc_msSinceSid_ > (enc_interval_ - 1)) || force_sid) {
354 /* Search for best dbov value. */
355 index = 0;
356 for (i = 1; i < 93; i++) {
357 /* Always round downwards. */
358 if ((enc_Energy_ - WebRtcCng_kDbov[i]) > 0) {
359 index = i;
360 break;
361 }
362 }
363 if ((i == 93) && (index == 0))
364 index = 94;
365
366 const size_t output_coefs = enc_nrOfCoefs_ + 1;
367 output->AppendData(output_coefs, [&](rtc::ArrayView<uint8_t> output) {
368 output[0] = (uint8_t)index;
369
370 /* Quantize coefficients with tweak for WebRtc implementation of
371 * RFC3389. */
372 if (enc_nrOfCoefs_ == WEBRTC_CNG_MAX_LPC_ORDER) {
373 for (i = 0; i < enc_nrOfCoefs_; i++) {
374 /* Q15 to Q7 with rounding. */
375 output[i + 1] = ((enc_reflCoefs_[i] + 128) >> 8);
376 }
377 } else {
378 for (i = 0; i < enc_nrOfCoefs_; i++) {
379 /* Q15 to Q7 with rounding. */
380 output[i + 1] = (127 + ((enc_reflCoefs_[i] + 128) >> 8));
381 }
382 }
383
384 return output_coefs;
385 });
386
387 enc_msSinceSid_ =
388 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
389 return output_coefs;
390 } else {
391 enc_msSinceSid_ +=
392 static_cast<int16_t>((1000 * num_samples) / enc_sampfreq_);
393 return 0;
394 }
395 }
396
397 namespace {
398 /* Values in `k` are Q15, and `a` Q12. */
WebRtcCng_K2a16(int16_t * k,int useOrder,int16_t * a)399 void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) {
400 int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
401 int16_t* aptr;
402 int16_t* aptr2;
403 int16_t* anyptr;
404 const int16_t* kptr;
405 int m, i;
406
407 kptr = k;
408 *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */
409 *any = *a;
410 a[1] = (*k + 4) >> 3;
411 for (m = 1; m < useOrder; m++) {
412 kptr++;
413 aptr = a;
414 aptr++;
415 aptr2 = &a[m];
416 anyptr = any;
417 anyptr++;
418
419 any[m + 1] = (*kptr + 4) >> 3;
420 for (i = 0; i < m; i++) {
421 *anyptr++ =
422 (*aptr++) +
423 (int16_t)((((int32_t)(*aptr2--) * (int32_t)*kptr) + 16384) >> 15);
424 }
425
426 aptr = a;
427 anyptr = any;
428 for (i = 0; i < (m + 2); i++) {
429 *aptr++ = *anyptr++;
430 }
431 }
432 }
433
434 } // namespace
435
436 } // namespace webrtc
437