1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include "main_FLP.h"
34 #include "tuning_parameters.h"
35
36 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
37 static OPUS_INLINE void silk_LBRR_encode_FLP(
38 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
39 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
40 const silk_float xfw[], /* I Input signal */
41 opus_int condCoding /* I The type of conditional coding used so far for this frame */
42 );
43
silk_encode_do_VAD_FLP(silk_encoder_state_FLP * psEnc)44 void silk_encode_do_VAD_FLP(
45 silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */
46 )
47 {
48 /****************************/
49 /* Voice Activity Detection */
50 /****************************/
51 silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch );
52
53 /**************************************************/
54 /* Convert speech activity into VAD and DTX flags */
55 /**************************************************/
56 if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
57 psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
58 psEnc->sCmn.noSpeechCounter++;
59 if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) {
60 psEnc->sCmn.inDTX = 0;
61 } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) {
62 psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX;
63 psEnc->sCmn.inDTX = 0;
64 }
65 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0;
66 } else {
67 psEnc->sCmn.noSpeechCounter = 0;
68 psEnc->sCmn.inDTX = 0;
69 psEnc->sCmn.indices.signalType = TYPE_UNVOICED;
70 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
71 }
72 }
73
74 /****************/
75 /* Encode frame */
76 /****************/
silk_encode_frame_FLP(silk_encoder_state_FLP * psEnc,opus_int32 * pnBytesOut,ec_enc * psRangeEnc,opus_int condCoding,opus_int maxBits,opus_int useCBR)77 opus_int silk_encode_frame_FLP(
78 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
79 opus_int32 *pnBytesOut, /* O Number of payload bytes; */
80 ec_enc *psRangeEnc, /* I/O compressor data structure */
81 opus_int condCoding, /* I The type of conditional coding to use */
82 opus_int maxBits, /* I If > 0: maximum number of output bits */
83 opus_int useCBR /* I Flag to force constant-bitrate operation */
84 )
85 {
86 silk_encoder_control_FLP sEncCtrl;
87 opus_int i, iter, maxIter, found_upper, found_lower, ret = 0;
88 silk_float *x_frame, *res_pitch_frame;
89 silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ];
90 ec_enc sRangeEnc_copy, sRangeEnc_copy2;
91 silk_nsq_state sNSQ_copy, sNSQ_copy2;
92 opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper;
93 opus_int32 gainsID, gainsID_lower, gainsID_upper;
94 opus_int16 gainMult_Q8;
95 opus_int16 ec_prevLagIndex_copy;
96 opus_int ec_prevSignalType_copy;
97 opus_int8 LastGainIndex_copy2;
98 opus_int32 pGains_Q16[ MAX_NB_SUBFR ];
99 opus_uint8 ec_buf_copy[ 1275 ];
100 opus_int gain_lock[ MAX_NB_SUBFR ] = {0};
101 opus_int16 best_gain_mult[ MAX_NB_SUBFR ];
102 opus_int best_sum[ MAX_NB_SUBFR ];
103
104 /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */
105 LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0;
106
107 psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3;
108
109 /**************************************************************/
110 /* Set up Input Pointers, and insert frame in input buffer */
111 /**************************************************************/
112 /* pointers aligned with start of frame to encode */
113 x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */
114 res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */
115
116 /***************************************/
117 /* Ensure smooth bandwidth transitions */
118 /***************************************/
119 silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
120
121 /*******************************************/
122 /* Copy new frame to front of input buffer */
123 /*******************************************/
124 silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length );
125
126 /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */
127 for( i = 0; i < 8; i++ ) {
128 x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f;
129 }
130
131 if( !psEnc->sCmn.prefillFlag ) {
132 /*****************************************/
133 /* Find pitch lags, initial LPC analysis */
134 /*****************************************/
135 silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch );
136
137 /************************/
138 /* Noise shape analysis */
139 /************************/
140 silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame );
141
142 /***************************************************/
143 /* Find linear prediction coefficients (LPC + LTP) */
144 /***************************************************/
145 silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding );
146
147 /****************************************/
148 /* Process gains */
149 /****************************************/
150 silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding );
151
152 /****************************************/
153 /* Low Bitrate Redundant Encoding */
154 /****************************************/
155 silk_LBRR_encode_FLP( psEnc, &sEncCtrl, x_frame, condCoding );
156
157 /* Loop over quantizer and entroy coding to control bitrate */
158 maxIter = 6;
159 gainMult_Q8 = SILK_FIX_CONST( 1, 8 );
160 found_lower = 0;
161 found_upper = 0;
162 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
163 gainsID_lower = -1;
164 gainsID_upper = -1;
165 /* Copy part of the input state */
166 silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) );
167 silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
168 seed_copy = psEnc->sCmn.indices.Seed;
169 ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex;
170 ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType;
171 for( iter = 0; ; iter++ ) {
172 if( gainsID == gainsID_lower ) {
173 nBits = nBits_lower;
174 } else if( gainsID == gainsID_upper ) {
175 nBits = nBits_upper;
176 } else {
177 /* Restore part of the input state */
178 if( iter > 0 ) {
179 silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) );
180 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) );
181 psEnc->sCmn.indices.Seed = seed_copy;
182 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
183 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
184 }
185
186 /*****************************************/
187 /* Noise shaping quantization */
188 /*****************************************/
189 silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, x_frame );
190
191 if ( iter == maxIter && !found_lower ) {
192 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
193 }
194
195 /****************************************/
196 /* Encode Parameters */
197 /****************************************/
198 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
199
200 /****************************************/
201 /* Encode Excitation Signal */
202 /****************************************/
203 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
204 psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
205
206 nBits = ec_tell( psRangeEnc );
207
208 /* If we still bust after the last iteration, do some damage control. */
209 if ( iter == maxIter && !found_lower && nBits > maxBits ) {
210 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
211
212 /* Keep gains the same as the last frame. */
213 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
214 for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
215 psEnc->sCmn.indices.GainsIndices[ i ] = 4;
216 }
217 if (condCoding != CODE_CONDITIONALLY) {
218 psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev;
219 }
220 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy;
221 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy;
222 /* Clear all pulses. */
223 for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) {
224 psEnc->sCmn.pulses[ i ] = 0;
225 }
226
227 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding );
228
229 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType,
230 psEnc->sCmn.pulses, psEnc->sCmn.frame_length );
231
232 nBits = ec_tell( psRangeEnc );
233 }
234
235 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) {
236 break;
237 }
238 }
239
240 if( iter == maxIter ) {
241 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) {
242 /* Restore output state from earlier iteration that did meet the bitrate budget */
243 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) );
244 silk_assert( sRangeEnc_copy2.offs <= 1275 );
245 silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs );
246 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) );
247 psEnc->sShape.LastGainIndex = LastGainIndex_copy2;
248 }
249 break;
250 }
251
252 if( nBits > maxBits ) {
253 if( found_lower == 0 && iter >= 2 ) {
254 /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */
255 sEncCtrl.Lambda = silk_max_float(sEncCtrl.Lambda*1.5f, 1.5f);
256 /* Reducing dithering can help us hit the target. */
257 psEnc->sCmn.indices.quantOffsetType = 0;
258 found_upper = 0;
259 gainsID_upper = -1;
260 } else {
261 found_upper = 1;
262 nBits_upper = nBits;
263 gainMult_upper = gainMult_Q8;
264 gainsID_upper = gainsID;
265 }
266 } else if( nBits < maxBits - 5 ) {
267 found_lower = 1;
268 nBits_lower = nBits;
269 gainMult_lower = gainMult_Q8;
270 if( gainsID != gainsID_lower ) {
271 gainsID_lower = gainsID;
272 /* Copy part of the output state */
273 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) );
274 silk_assert( psRangeEnc->offs <= 1275 );
275 silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs );
276 silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
277 LastGainIndex_copy2 = psEnc->sShape.LastGainIndex;
278 }
279 } else {
280 /* Within 5 bits of budget: close enough */
281 break;
282 }
283
284 if ( !found_lower && nBits > maxBits ) {
285 int j;
286 for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
287 int sum=0;
288 for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) {
289 sum += abs( psEnc->sCmn.pulses[j] );
290 }
291 if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) {
292 best_sum[i] = sum;
293 best_gain_mult[i] = gainMult_Q8;
294 } else {
295 gain_lock[i] = 1;
296 }
297 }
298 }
299 if( ( found_lower & found_upper ) == 0 ) {
300 /* Adjust gain according to high-rate rate/distortion curve */
301 if( nBits > maxBits ) {
302 if (gainMult_Q8 < 16384) {
303 gainMult_Q8 *= 2;
304 } else {
305 gainMult_Q8 = 32767;
306 }
307 } else {
308 opus_int32 gain_factor_Q16;
309 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) );
310 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 );
311 }
312 } else {
313 /* Adjust gain by interpolating */
314 gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower );
315 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */
316 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) {
317 gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 );
318 } else
319 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) {
320 gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 );
321 }
322 }
323
324 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
325 opus_int16 tmp;
326 if ( gain_lock[i] ) {
327 tmp = best_gain_mult[i];
328 } else {
329 tmp = gainMult_Q8;
330 }
331 pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 );
332 }
333
334 /* Quantize gains */
335 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev;
336 silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16,
337 &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
338
339 /* Unique identifier of gains vector */
340 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr );
341
342 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
343 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) {
344 sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f;
345 }
346 }
347 }
348
349 /* Update input buffer */
350 silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ],
351 ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) );
352
353 /* Exit without entropy coding */
354 if( psEnc->sCmn.prefillFlag ) {
355 /* No payload */
356 *pnBytesOut = 0;
357 return ret;
358 }
359
360 /* Parameters needed for next frame */
361 psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ];
362 psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType;
363
364 /****************************************/
365 /* Finalize payload */
366 /****************************************/
367 psEnc->sCmn.first_frame_after_reset = 0;
368 /* Payload size */
369 *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 );
370
371 return ret;
372 }
373
374 /* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */
silk_LBRR_encode_FLP(silk_encoder_state_FLP * psEnc,silk_encoder_control_FLP * psEncCtrl,const silk_float xfw[],opus_int condCoding)375 static OPUS_INLINE void silk_LBRR_encode_FLP(
376 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */
377 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */
378 const silk_float xfw[], /* I Input signal */
379 opus_int condCoding /* I The type of conditional coding used so far for this frame */
380 )
381 {
382 opus_int k;
383 opus_int32 Gains_Q16[ MAX_NB_SUBFR ];
384 silk_float TempGains[ MAX_NB_SUBFR ];
385 SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ];
386 silk_nsq_state sNSQ_LBRR;
387
388 /*******************************************/
389 /* Control use of inband LBRR */
390 /*******************************************/
391 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) {
392 psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1;
393
394 /* Copy noise shaping quantizer state and quantization indices from regular encoding */
395 silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) );
396 silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) );
397
398 /* Save original gains */
399 silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
400
401 if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) {
402 /* First frame in packet or previous frame not LBRR coded */
403 psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex;
404
405 /* Increase Gains to get target LBRR rate */
406 psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases;
407 psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 );
408 }
409
410 /* Decode to get gains in sync with decoder */
411 silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices,
412 &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr );
413
414 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */
415 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {
416 psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f );
417 }
418
419 /*****************************************/
420 /* Noise shaping quantization */
421 /*****************************************/
422 silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR,
423 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw );
424
425 /* Restore original gains */
426 silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) );
427 }
428 }
429