1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "define.h"
32 #include "API.h"
33 #include "control.h"
34 #include "typedef.h"
35 #include "stack_alloc.h"
36 #include "structs.h"
37 #include "tuning_parameters.h"
38 #ifdef FIXED_POINT
39 #include "main_FIX.h"
40 #else
41 #include "main_FLP.h"
42 #endif
43
44 /***************************************/
45 /* Read control structure from encoder */
46 /***************************************/
47 static opus_int silk_QueryEncoder( /* O Returns error code */
48 const void *encState, /* I State */
49 silk_EncControlStruct *encStatus /* O Encoder Status */
50 );
51
52 /****************************************/
53 /* Encoder functions */
54 /****************************************/
55
silk_Get_Encoder_Size(opus_int * encSizeBytes)56 opus_int silk_Get_Encoder_Size( /* O Returns error code */
57 opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
58 )
59 {
60 opus_int ret = SILK_NO_ERROR;
61
62 *encSizeBytes = sizeof( silk_encoder );
63
64 return ret;
65 }
66
67 /*************************/
68 /* Init or Reset encoder */
69 /*************************/
silk_InitEncoder(void * encState,int arch,silk_EncControlStruct * encStatus)70 opus_int silk_InitEncoder( /* O Returns error code */
71 void *encState, /* I/O State */
72 int arch, /* I Run-time architecture */
73 silk_EncControlStruct *encStatus /* O Encoder Status */
74 )
75 {
76 silk_encoder *psEnc;
77 opus_int n, ret = SILK_NO_ERROR;
78
79 psEnc = (silk_encoder *)encState;
80
81 /* Reset encoder */
82 silk_memset( psEnc, 0, sizeof( silk_encoder ) );
83 for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
84 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
85 silk_assert( 0 );
86 }
87 }
88
89 psEnc->nChannelsAPI = 1;
90 psEnc->nChannelsInternal = 1;
91
92 /* Read control structure */
93 if( ret += silk_QueryEncoder( encState, encStatus ) ) {
94 silk_assert( 0 );
95 }
96
97 return ret;
98 }
99
100 /***************************************/
101 /* Read control structure from encoder */
102 /***************************************/
silk_QueryEncoder(const void * encState,silk_EncControlStruct * encStatus)103 static opus_int silk_QueryEncoder( /* O Returns error code */
104 const void *encState, /* I State */
105 silk_EncControlStruct *encStatus /* O Encoder Status */
106 )
107 {
108 opus_int ret = SILK_NO_ERROR;
109 silk_encoder_state_Fxx *state_Fxx;
110 silk_encoder *psEnc = (silk_encoder *)encState;
111
112 state_Fxx = psEnc->state_Fxx;
113
114 encStatus->nChannelsAPI = psEnc->nChannelsAPI;
115 encStatus->nChannelsInternal = psEnc->nChannelsInternal;
116 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz;
117 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
118 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
119 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
120 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms;
121 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps;
122 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
123 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity;
124 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC;
125 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX;
126 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR;
127 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
128 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
129 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
130
131 return ret;
132 }
133
134
135 /**************************/
136 /* Encode frame with Silk */
137 /**************************/
138 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
139 /* encControl->payloadSize_ms is set to */
silk_Encode(void * encState,silk_EncControlStruct * encControl,const opus_int16 * samplesIn,opus_int nSamplesIn,ec_enc * psRangeEnc,opus_int32 * nBytesOut,const opus_int prefillFlag)140 opus_int silk_Encode( /* O Returns error code */
141 void *encState, /* I/O State */
142 silk_EncControlStruct *encControl, /* I Control status */
143 const opus_int16 *samplesIn, /* I Speech sample input vector */
144 opus_int nSamplesIn, /* I Number of samples in input vector */
145 ec_enc *psRangeEnc, /* I/O Compressor data structure */
146 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
147 const opus_int prefillFlag /* I Flag to indicate prefilling buffers no coding */
148 )
149 {
150 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
151 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
152 opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
153 opus_int speech_act_thr_for_switch_Q8;
154 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
155 silk_encoder *psEnc = ( silk_encoder * )encState;
156 VARDECL( opus_int16, buf );
157 opus_int transition, curr_block, tot_blocks;
158 SAVE_STACK;
159
160 if (encControl->reducedDependency)
161 {
162 psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
163 psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
164 }
165 psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
166
167 /* Check values in encoder control structure */
168 if( ( ret = check_control_input( encControl ) ) != 0 ) {
169 silk_assert( 0 );
170 RESTORE_STACK;
171 return ret;
172 }
173
174 encControl->switchReady = 0;
175
176 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
177 /* Mono -> Stereo transition: init state of second channel and stereo state */
178 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
179 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
180 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
181 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
182 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
183 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
184 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
185 psEnc->sStereo.width_prev_Q14 = 0;
186 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
187 if( psEnc->nChannelsAPI == 2 ) {
188 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
189 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
190 }
191 }
192
193 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
194
195 psEnc->nChannelsAPI = encControl->nChannelsAPI;
196 psEnc->nChannelsInternal = encControl->nChannelsInternal;
197
198 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
199 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
200 curr_block = 0;
201 if( prefillFlag ) {
202 /* Only accept input length of 10 ms */
203 if( nBlocksOf10ms != 1 ) {
204 silk_assert( 0 );
205 RESTORE_STACK;
206 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
207 }
208 /* Reset Encoder */
209 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
210 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
211 silk_assert( !ret );
212 }
213 tmp_payloadSize_ms = encControl->payloadSize_ms;
214 encControl->payloadSize_ms = 10;
215 tmp_complexity = encControl->complexity;
216 encControl->complexity = 0;
217 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
218 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
219 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
220 }
221 } else {
222 /* Only accept input lengths that are a multiple of 10 ms */
223 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
224 silk_assert( 0 );
225 RESTORE_STACK;
226 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
227 }
228 /* Make sure no more than one packet can be produced */
229 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
230 silk_assert( 0 );
231 RESTORE_STACK;
232 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
233 }
234 }
235
236 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
237 /* Force the side channel to the same rate as the mid */
238 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
239 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
240 silk_assert( 0 );
241 RESTORE_STACK;
242 return ret;
243 }
244 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
245 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
246 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
247 }
248 }
249 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
250 }
251 silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
252
253 /* Input buffering/resampling and encoding */
254 nSamplesToBufferMax =
255 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
256 nSamplesFromInputMax =
257 silk_DIV32_16( nSamplesToBufferMax *
258 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
259 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
260 ALLOC( buf, nSamplesFromInputMax, opus_int16 );
261 while( 1 ) {
262 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
263 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
264 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
265 /* Resample and write to buffer */
266 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
267 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
268 for( n = 0; n < nSamplesFromInput; n++ ) {
269 buf[ n ] = samplesIn[ 2 * n ];
270 }
271 /* Making sure to start both resamplers from the same state when switching from mono to stereo */
272 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
273 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
274 }
275
276 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
277 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
278 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
279
280 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
281 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
282 for( n = 0; n < nSamplesFromInput; n++ ) {
283 buf[ n ] = samplesIn[ 2 * n + 1 ];
284 }
285 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
286 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
287
288 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
289 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
290 /* Combine left and right channels before resampling */
291 for( n = 0; n < nSamplesFromInput; n++ ) {
292 sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
293 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
294 }
295 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
296 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
297 /* On the first mono frame, average the results for the two resampler states */
298 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
299 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
300 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
301 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
302 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
303 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
304 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
305 }
306 }
307 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
308 } else {
309 silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
310 silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
311 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
312 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
313 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
314 }
315
316 samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
317 nSamplesIn -= nSamplesFromInput;
318
319 /* Default */
320 psEnc->allowBandwidthSwitch = 0;
321
322 /* Silk encoder */
323 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
324 /* Enough data in input buffer, so encode */
325 silk_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
326 silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
327
328 /* Deal with LBRR data */
329 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
330 /* Create space at start of payload for VAD and FEC flags */
331 opus_uint8 iCDF[ 2 ] = { 0, 0 };
332 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
333 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
334
335 /* Encode any LBRR data from previous packet */
336 /* Encode LBRR flags */
337 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
338 LBRR_symbol = 0;
339 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
340 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
341 }
342 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
343 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
344 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
345 }
346 }
347
348 /* Code LBRR indices and excitation signals */
349 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
350 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
351 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
352 opus_int condCoding;
353
354 if( encControl->nChannelsInternal == 2 && n == 0 ) {
355 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
356 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
357 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
358 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
359 }
360 }
361 /* Use conditional coding if previous frame available */
362 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
363 condCoding = CODE_CONDITIONALLY;
364 } else {
365 condCoding = CODE_INDEPENDENTLY;
366 }
367 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
368 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
369 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
370 }
371 }
372 }
373
374 /* Reset LBRR flags */
375 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
376 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
377 }
378
379 psEnc->nBitsUsedLBRR = ec_tell( psRangeEnc );
380 }
381
382 silk_HP_variable_cutoff( psEnc->state_Fxx );
383
384 /* Total target bits for packet */
385 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
386 /* Subtract bits used for LBRR */
387 if( !prefillFlag ) {
388 nBits -= psEnc->nBitsUsedLBRR;
389 }
390 /* Divide by number of uncoded frames left in packet */
391 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket );
392 /* Convert to bits/second */
393 if( encControl->payloadSize_ms == 10 ) {
394 TargetRate_bps = silk_SMULBB( nBits, 100 );
395 } else {
396 TargetRate_bps = silk_SMULBB( nBits, 50 );
397 }
398 /* Subtract fraction of bits in excess of target in previous frames and packets */
399 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
400 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) {
401 /* Compare actual vs target bits so far in this packet */
402 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
403 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
404 }
405 /* Never exceed input bitrate */
406 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
407
408 /* Convert Left/Right to Mid/Side */
409 if( encControl->nChannelsInternal == 2 ) {
410 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
411 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
412 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
413 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
414 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
415 /* Reset side channel encoder memory for first frame with side coding */
416 if( psEnc->prev_decode_only_middle == 1 ) {
417 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
418 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
419 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
420 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
421 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
422 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
423 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
424 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
425 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
426 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
427 }
428 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ] );
429 } else {
430 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
431 }
432 if( !prefillFlag ) {
433 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
434 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
435 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
436 }
437 }
438 } else {
439 /* Buffering */
440 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
441 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
442 }
443 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ] );
444
445 /* Encode */
446 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
447 opus_int maxBits, useCBR;
448
449 /* Handling rate constraints */
450 maxBits = encControl->maxBits;
451 if( tot_blocks == 2 && curr_block == 0 ) {
452 maxBits = maxBits * 3 / 5;
453 } else if( tot_blocks == 3 ) {
454 if( curr_block == 0 ) {
455 maxBits = maxBits * 2 / 5;
456 } else if( curr_block == 1 ) {
457 maxBits = maxBits * 3 / 4;
458 }
459 }
460 useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
461
462 if( encControl->nChannelsInternal == 1 ) {
463 channelRate_bps = TargetRate_bps;
464 } else {
465 channelRate_bps = MStargetRates_bps[ n ];
466 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
467 useCBR = 0;
468 /* Give mid up to 1/2 of the max bits for that frame */
469 maxBits -= encControl->maxBits / ( tot_blocks * 2 );
470 }
471 }
472
473 if( channelRate_bps > 0 ) {
474 opus_int condCoding;
475
476 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
477
478 /* Use independent coding if no previous frame available */
479 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
480 condCoding = CODE_INDEPENDENTLY;
481 } else if( n > 0 && psEnc->prev_decode_only_middle ) {
482 /* If we skipped a side frame in this packet, we don't
483 need LTP scaling; the LTP state is well-defined. */
484 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
485 } else {
486 condCoding = CODE_CONDITIONALLY;
487 }
488 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
489 silk_assert( 0 );
490 }
491 }
492 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
493 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
494 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
495 }
496 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
497
498 /* Insert VAD and FEC flags at beginning of bitstream */
499 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
500 flags = 0;
501 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
502 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
503 flags = silk_LSHIFT( flags, 1 );
504 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
505 }
506 flags = silk_LSHIFT( flags, 1 );
507 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
508 }
509 if( !prefillFlag ) {
510 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
511 }
512
513 /* Return zero bytes if all channels DTXed */
514 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
515 *nBytesOut = 0;
516 }
517
518 psEnc->nBitsExceeded += *nBytesOut * 8;
519 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
520 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
521
522 /* Update flag indicating if bandwidth switching is allowed */
523 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
524 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
525 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
526 psEnc->allowBandwidthSwitch = 1;
527 psEnc->timeSinceSwitchAllowed_ms = 0;
528 } else {
529 psEnc->allowBandwidthSwitch = 0;
530 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
531 }
532 }
533
534 if( nSamplesIn == 0 ) {
535 break;
536 }
537 } else {
538 break;
539 }
540 curr_block++;
541 }
542
543 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
544
545 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
546 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
547 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
548 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
549 if( prefillFlag ) {
550 encControl->payloadSize_ms = tmp_payloadSize_ms;
551 encControl->complexity = tmp_complexity;
552 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
553 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
554 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
555 }
556 }
557
558 encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType;
559 encControl->offset = silk_Quantization_Offsets_Q10
560 [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ]
561 [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ];
562 RESTORE_STACK;
563 return ret;
564 }
565
566