1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "define.h"
32 #include "API.h"
33 #include "control.h"
34 #include "typedef.h"
35 #include "stack_alloc.h"
36 #include "structs.h"
37 #include "tuning_parameters.h"
38 #ifdef FIXED_POINT
39 #include "main_FIX.h"
40 #else
41 #include "main_FLP.h"
42 #endif
43
44 /***************************************/
45 /* Read control structure from encoder */
46 /***************************************/
47 static opus_int silk_QueryEncoder( /* O Returns error code */
48 const void *encState, /* I State */
49 silk_EncControlStruct *encStatus /* O Encoder Status */
50 );
51
52 /****************************************/
53 /* Encoder functions */
54 /****************************************/
55
silk_Get_Encoder_Size(opus_int * encSizeBytes)56 opus_int silk_Get_Encoder_Size( /* O Returns error code */
57 opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
58 )
59 {
60 opus_int ret = SILK_NO_ERROR;
61
62 *encSizeBytes = sizeof( silk_encoder );
63
64 return ret;
65 }
66
67 /*************************/
68 /* Init or Reset encoder */
69 /*************************/
silk_InitEncoder(void * encState,int arch,silk_EncControlStruct * encStatus)70 opus_int silk_InitEncoder( /* O Returns error code */
71 void *encState, /* I/O State */
72 int arch, /* I Run-time architecture */
73 silk_EncControlStruct *encStatus /* O Encoder Status */
74 )
75 {
76 silk_encoder *psEnc;
77 opus_int n, ret = SILK_NO_ERROR;
78
79 psEnc = (silk_encoder *)encState;
80
81 /* Reset encoder */
82 silk_memset( psEnc, 0, sizeof( silk_encoder ) );
83 for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
84 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
85 celt_assert( 0 );
86 }
87 }
88
89 psEnc->nChannelsAPI = 1;
90 psEnc->nChannelsInternal = 1;
91
92 /* Read control structure */
93 if( ret += silk_QueryEncoder( encState, encStatus ) ) {
94 celt_assert( 0 );
95 }
96
97 return ret;
98 }
99
100 /***************************************/
101 /* Read control structure from encoder */
102 /***************************************/
silk_QueryEncoder(const void * encState,silk_EncControlStruct * encStatus)103 static opus_int silk_QueryEncoder( /* O Returns error code */
104 const void *encState, /* I State */
105 silk_EncControlStruct *encStatus /* O Encoder Status */
106 )
107 {
108 opus_int ret = SILK_NO_ERROR;
109 silk_encoder_state_Fxx *state_Fxx;
110 silk_encoder *psEnc = (silk_encoder *)encState;
111
112 state_Fxx = psEnc->state_Fxx;
113
114 encStatus->nChannelsAPI = psEnc->nChannelsAPI;
115 encStatus->nChannelsInternal = psEnc->nChannelsInternal;
116 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz;
117 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
118 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
119 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
120 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms;
121 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps;
122 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
123 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity;
124 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC;
125 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX;
126 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR;
127 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
128 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
129 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
130
131 return ret;
132 }
133
134
135 /**************************/
136 /* Encode frame with Silk */
137 /**************************/
138 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
139 /* encControl->payloadSize_ms is set to */
silk_Encode(void * encState,silk_EncControlStruct * encControl,const opus_int16 * samplesIn,opus_int nSamplesIn,ec_enc * psRangeEnc,opus_int32 * nBytesOut,const opus_int prefillFlag,opus_int activity)140 opus_int silk_Encode( /* O Returns error code */
141 void *encState, /* I/O State */
142 silk_EncControlStruct *encControl, /* I Control status */
143 const opus_int16 *samplesIn, /* I Speech sample input vector */
144 opus_int nSamplesIn, /* I Number of samples in input vector */
145 ec_enc *psRangeEnc, /* I/O Compressor data structure */
146 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
147 const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
148 opus_int activity /* I Decision of Opus voice activity detector */
149 )
150 {
151 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
152 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
153 opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
154 opus_int speech_act_thr_for_switch_Q8;
155 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
156 silk_encoder *psEnc = ( silk_encoder * )encState;
157 VARDECL( opus_int16, buf );
158 opus_int transition, curr_block, tot_blocks;
159 SAVE_STACK;
160
161 if (encControl->reducedDependency)
162 {
163 psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
164 psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
165 }
166 psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
167
168 /* Check values in encoder control structure */
169 if( ( ret = check_control_input( encControl ) ) != 0 ) {
170 celt_assert( 0 );
171 RESTORE_STACK;
172 return ret;
173 }
174
175 encControl->switchReady = 0;
176
177 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
178 /* Mono -> Stereo transition: init state of second channel and stereo state */
179 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
180 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
181 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
182 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
183 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
184 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
185 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
186 psEnc->sStereo.width_prev_Q14 = 0;
187 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
188 if( psEnc->nChannelsAPI == 2 ) {
189 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
190 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
191 }
192 }
193
194 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
195
196 psEnc->nChannelsAPI = encControl->nChannelsAPI;
197 psEnc->nChannelsInternal = encControl->nChannelsInternal;
198
199 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
200 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
201 curr_block = 0;
202 if( prefillFlag ) {
203 silk_LP_state save_LP;
204 /* Only accept input length of 10 ms */
205 if( nBlocksOf10ms != 1 ) {
206 celt_assert( 0 );
207 RESTORE_STACK;
208 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
209 }
210 if ( prefillFlag == 2 ) {
211 save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP;
212 /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */
213 save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
214 }
215 /* Reset Encoder */
216 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
217 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
218 /* Restore the variable LP state. */
219 if ( prefillFlag == 2 ) {
220 psEnc->state_Fxx[ n ].sCmn.sLP = save_LP;
221 }
222 celt_assert( !ret );
223 }
224 tmp_payloadSize_ms = encControl->payloadSize_ms;
225 encControl->payloadSize_ms = 10;
226 tmp_complexity = encControl->complexity;
227 encControl->complexity = 0;
228 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
229 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
230 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
231 }
232 } else {
233 /* Only accept input lengths that are a multiple of 10 ms */
234 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
235 celt_assert( 0 );
236 RESTORE_STACK;
237 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
238 }
239 /* Make sure no more than one packet can be produced */
240 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
241 celt_assert( 0 );
242 RESTORE_STACK;
243 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
244 }
245 }
246
247 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
248 /* Force the side channel to the same rate as the mid */
249 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
250 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
251 silk_assert( 0 );
252 RESTORE_STACK;
253 return ret;
254 }
255 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
256 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
257 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
258 }
259 }
260 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
261 }
262 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
263
264 /* Input buffering/resampling and encoding */
265 nSamplesToBufferMax =
266 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
267 nSamplesFromInputMax =
268 silk_DIV32_16( nSamplesToBufferMax *
269 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
270 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
271 ALLOC( buf, nSamplesFromInputMax, opus_int16 );
272 while( 1 ) {
273 int curr_nBitsUsedLBRR = 0;
274 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
275 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
276 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
277 /* Resample and write to buffer */
278 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
279 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
280 for( n = 0; n < nSamplesFromInput; n++ ) {
281 buf[ n ] = samplesIn[ 2 * n ];
282 }
283 /* Making sure to start both resamplers from the same state when switching from mono to stereo */
284 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
285 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
286 }
287
288 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
289 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
290 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
291
292 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
293 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
294 for( n = 0; n < nSamplesFromInput; n++ ) {
295 buf[ n ] = samplesIn[ 2 * n + 1 ];
296 }
297 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
298 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
299
300 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
301 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
302 /* Combine left and right channels before resampling */
303 for( n = 0; n < nSamplesFromInput; n++ ) {
304 sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
305 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
306 }
307 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
308 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
309 /* On the first mono frame, average the results for the two resampler states */
310 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
311 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
312 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
313 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
314 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
315 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
316 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
317 }
318 }
319 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
320 } else {
321 celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
322 silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
323 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
324 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
325 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
326 }
327
328 samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
329 nSamplesIn -= nSamplesFromInput;
330
331 /* Default */
332 psEnc->allowBandwidthSwitch = 0;
333
334 /* Silk encoder */
335 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
336 /* Enough data in input buffer, so encode */
337 celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
338 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
339
340 /* Deal with LBRR data */
341 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
342 /* Create space at start of payload for VAD and FEC flags */
343 opus_uint8 iCDF[ 2 ] = { 0, 0 };
344 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
345 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
346 curr_nBitsUsedLBRR = ec_tell( psRangeEnc );
347
348 /* Encode any LBRR data from previous packet */
349 /* Encode LBRR flags */
350 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
351 LBRR_symbol = 0;
352 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
353 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
354 }
355 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
356 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
357 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
358 }
359 }
360
361 /* Code LBRR indices and excitation signals */
362 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
363 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
364 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
365 opus_int condCoding;
366
367 if( encControl->nChannelsInternal == 2 && n == 0 ) {
368 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
369 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
370 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
371 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
372 }
373 }
374 /* Use conditional coding if previous frame available */
375 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
376 condCoding = CODE_CONDITIONALLY;
377 } else {
378 condCoding = CODE_INDEPENDENTLY;
379 }
380 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
381 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
382 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
383 }
384 }
385 }
386
387 /* Reset LBRR flags */
388 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
389 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
390 }
391 curr_nBitsUsedLBRR = ec_tell( psRangeEnc ) - curr_nBitsUsedLBRR;
392 }
393
394 silk_HP_variable_cutoff( psEnc->state_Fxx );
395
396 /* Total target bits for packet */
397 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
398 /* Subtract bits used for LBRR */
399 if( !prefillFlag ) {
400 /* psEnc->nBitsUsedLBRR is an exponential moving average of the LBRR usage,
401 except that for the first LBRR frame it does no averaging and for the first
402 frame after after LBRR, it goes back to zero immediately. */
403 if ( curr_nBitsUsedLBRR < 10 ) {
404 psEnc->nBitsUsedLBRR = 0;
405 } else if ( psEnc->nBitsUsedLBRR < 10) {
406 psEnc->nBitsUsedLBRR = curr_nBitsUsedLBRR;
407 } else {
408 psEnc->nBitsUsedLBRR = ( psEnc->nBitsUsedLBRR + curr_nBitsUsedLBRR ) / 2;
409 }
410 nBits -= psEnc->nBitsUsedLBRR;
411 }
412 /* Divide by number of uncoded frames left in packet */
413 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket );
414 /* Convert to bits/second */
415 if( encControl->payloadSize_ms == 10 ) {
416 TargetRate_bps = silk_SMULBB( nBits, 100 );
417 } else {
418 TargetRate_bps = silk_SMULBB( nBits, 50 );
419 }
420 /* Subtract fraction of bits in excess of target in previous frames and packets */
421 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
422 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) {
423 /* Compare actual vs target bits so far in this packet */
424 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
425 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
426 }
427 /* Never exceed input bitrate */
428 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
429
430 /* Convert Left/Right to Mid/Side */
431 if( encControl->nChannelsInternal == 2 ) {
432 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
433 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
434 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
435 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
436 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
437 /* Reset side channel encoder memory for first frame with side coding */
438 if( psEnc->prev_decode_only_middle == 1 ) {
439 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
440 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
441 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
442 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
443 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
444 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
445 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
446 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
447 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
448 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
449 }
450 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity );
451 } else {
452 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
453 }
454 if( !prefillFlag ) {
455 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
456 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
457 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
458 }
459 }
460 } else {
461 /* Buffering */
462 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
463 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
464 }
465 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity );
466
467 /* Encode */
468 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
469 opus_int maxBits, useCBR;
470
471 /* Handling rate constraints */
472 maxBits = encControl->maxBits;
473 if( tot_blocks == 2 && curr_block == 0 ) {
474 maxBits = maxBits * 3 / 5;
475 } else if( tot_blocks == 3 ) {
476 if( curr_block == 0 ) {
477 maxBits = maxBits * 2 / 5;
478 } else if( curr_block == 1 ) {
479 maxBits = maxBits * 3 / 4;
480 }
481 }
482 useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
483
484 if( encControl->nChannelsInternal == 1 ) {
485 channelRate_bps = TargetRate_bps;
486 } else {
487 channelRate_bps = MStargetRates_bps[ n ];
488 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
489 useCBR = 0;
490 /* Give mid up to 1/2 of the max bits for that frame */
491 maxBits -= encControl->maxBits / ( tot_blocks * 2 );
492 }
493 }
494
495 if( channelRate_bps > 0 ) {
496 opus_int condCoding;
497
498 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
499
500 /* Use independent coding if no previous frame available */
501 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
502 condCoding = CODE_INDEPENDENTLY;
503 } else if( n > 0 && psEnc->prev_decode_only_middle ) {
504 /* If we skipped a side frame in this packet, we don't
505 need LTP scaling; the LTP state is well-defined. */
506 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
507 } else {
508 condCoding = CODE_CONDITIONALLY;
509 }
510 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
511 silk_assert( 0 );
512 }
513 }
514 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
515 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
516 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
517 }
518 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
519
520 /* Insert VAD and FEC flags at beginning of bitstream */
521 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
522 flags = 0;
523 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
524 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
525 flags = silk_LSHIFT( flags, 1 );
526 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
527 }
528 flags = silk_LSHIFT( flags, 1 );
529 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
530 }
531 if( !prefillFlag ) {
532 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
533 }
534
535 /* Return zero bytes if all channels DTXed */
536 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
537 *nBytesOut = 0;
538 }
539
540 psEnc->nBitsExceeded += *nBytesOut * 8;
541 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
542 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
543
544 /* Update flag indicating if bandwidth switching is allowed */
545 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
546 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
547 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
548 psEnc->allowBandwidthSwitch = 1;
549 psEnc->timeSinceSwitchAllowed_ms = 0;
550 } else {
551 psEnc->allowBandwidthSwitch = 0;
552 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
553 }
554 }
555
556 if( nSamplesIn == 0 ) {
557 break;
558 }
559 } else {
560 break;
561 }
562 curr_block++;
563 }
564
565 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
566
567 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
568 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
569 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
570 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
571 if( prefillFlag ) {
572 encControl->payloadSize_ms = tmp_payloadSize_ms;
573 encControl->complexity = tmp_complexity;
574 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
575 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
576 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
577 }
578 }
579
580 encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType;
581 encControl->offset = silk_Quantization_Offsets_Q10
582 [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ]
583 [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ];
584 RESTORE_STACK;
585 return ret;
586 }
587
588