1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "API.h"
32 #include "main.h"
33 #include "stack_alloc.h"
34 #include "os_support.h"
35
36 /************************/
37 /* Decoder Super Struct */
38 /************************/
39 typedef struct {
40 silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ];
41 stereo_dec_state sStereo;
42 opus_int nChannelsAPI;
43 opus_int nChannelsInternal;
44 opus_int prev_decode_only_middle;
45 } silk_decoder;
46
47 /*********************/
48 /* Decoder functions */
49 /*********************/
50
silk_Get_Decoder_Size(opus_int * decSizeBytes)51 opus_int silk_Get_Decoder_Size( /* O Returns error code */
52 opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */
53 )
54 {
55 opus_int ret = SILK_NO_ERROR;
56
57 *decSizeBytes = sizeof( silk_decoder );
58
59 return ret;
60 }
61
62 /* Reset decoder state */
silk_InitDecoder(void * decState)63 opus_int silk_InitDecoder( /* O Returns error code */
64 void *decState /* I/O State */
65 )
66 {
67 opus_int n, ret = SILK_NO_ERROR;
68 silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
69
70 for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
71 ret = silk_init_decoder( &channel_state[ n ] );
72 }
73 silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
74 /* Not strictly needed, but it's cleaner that way */
75 ((silk_decoder *)decState)->prev_decode_only_middle = 0;
76
77 return ret;
78 }
79
80 /* Decode a frame */
silk_Decode(void * decState,silk_DecControlStruct * decControl,opus_int lostFlag,opus_int newPacketFlag,ec_dec * psRangeDec,opus_int16 * samplesOut,opus_int32 * nSamplesOut,int arch)81 opus_int silk_Decode( /* O Returns error code */
82 void* decState, /* I/O State */
83 silk_DecControlStruct* decControl, /* I/O Control Structure */
84 opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */
85 opus_int newPacketFlag, /* I Indicates first decoder call for this packet */
86 ec_dec *psRangeDec, /* I/O Compressor data structure */
87 opus_int16 *samplesOut, /* O Decoded output speech vector */
88 opus_int32 *nSamplesOut, /* O Number of samples decoded */
89 int arch /* I Run-time architecture */
90 )
91 {
92 opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
93 opus_int32 nSamplesOutDec, LBRR_symbol;
94 opus_int16 *samplesOut1_tmp[ 2 ];
95 VARDECL( opus_int16, samplesOut1_tmp_storage1 );
96 VARDECL( opus_int16, samplesOut1_tmp_storage2 );
97 VARDECL( opus_int16, samplesOut2_tmp );
98 opus_int32 MS_pred_Q13[ 2 ] = { 0 };
99 opus_int16 *resample_out_ptr;
100 silk_decoder *psDec = ( silk_decoder * )decState;
101 silk_decoder_state *channel_state = psDec->channel_state;
102 opus_int has_side;
103 opus_int stereo_to_mono;
104 int delay_stack_alloc;
105 SAVE_STACK;
106
107 celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
108
109 /**********************************/
110 /* Test if first frame in payload */
111 /**********************************/
112 if( newPacketFlag ) {
113 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
114 channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */
115 }
116 }
117
118 /* If Mono -> Stereo transition in bitstream: init state of second channel */
119 if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
120 ret += silk_init_decoder( &channel_state[ 1 ] );
121 }
122
123 stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
124 ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
125
126 if( channel_state[ 0 ].nFramesDecoded == 0 ) {
127 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
128 opus_int fs_kHz_dec;
129 if( decControl->payloadSize_ms == 0 ) {
130 /* Assuming packet loss, use 10 ms */
131 channel_state[ n ].nFramesPerPacket = 1;
132 channel_state[ n ].nb_subfr = 2;
133 } else if( decControl->payloadSize_ms == 10 ) {
134 channel_state[ n ].nFramesPerPacket = 1;
135 channel_state[ n ].nb_subfr = 2;
136 } else if( decControl->payloadSize_ms == 20 ) {
137 channel_state[ n ].nFramesPerPacket = 1;
138 channel_state[ n ].nb_subfr = 4;
139 } else if( decControl->payloadSize_ms == 40 ) {
140 channel_state[ n ].nFramesPerPacket = 2;
141 channel_state[ n ].nb_subfr = 4;
142 } else if( decControl->payloadSize_ms == 60 ) {
143 channel_state[ n ].nFramesPerPacket = 3;
144 channel_state[ n ].nb_subfr = 4;
145 } else {
146 celt_assert( 0 );
147 RESTORE_STACK;
148 return SILK_DEC_INVALID_FRAME_SIZE;
149 }
150 fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
151 if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
152 celt_assert( 0 );
153 RESTORE_STACK;
154 return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
155 }
156 ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
157 }
158 }
159
160 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
161 silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
162 silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
163 silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
164 }
165 psDec->nChannelsAPI = decControl->nChannelsAPI;
166 psDec->nChannelsInternal = decControl->nChannelsInternal;
167
168 if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
169 ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
170 RESTORE_STACK;
171 return( ret );
172 }
173
174 if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
175 /* First decoder call for this payload */
176 /* Decode VAD flags and LBRR flag */
177 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
178 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
179 channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
180 }
181 channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
182 }
183 /* Decode LBRR flags */
184 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
185 silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
186 if( channel_state[ n ].LBRR_flag ) {
187 if( channel_state[ n ].nFramesPerPacket == 1 ) {
188 channel_state[ n ].LBRR_flags[ 0 ] = 1;
189 } else {
190 LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
191 for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
192 channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
193 }
194 }
195 }
196 }
197
198 if( lostFlag == FLAG_DECODE_NORMAL ) {
199 /* Regular decoding: skip all LBRR data */
200 for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
201 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
202 if( channel_state[ n ].LBRR_flags[ i ] ) {
203 opus_int16 pulses[ MAX_FRAME_LENGTH ];
204 opus_int condCoding;
205
206 if( decControl->nChannelsInternal == 2 && n == 0 ) {
207 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
208 if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
209 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
210 }
211 }
212 /* Use conditional coding if previous frame available */
213 if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
214 condCoding = CODE_CONDITIONALLY;
215 } else {
216 condCoding = CODE_INDEPENDENTLY;
217 }
218 silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
219 silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
220 channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
221 }
222 }
223 }
224 }
225 }
226
227 /* Get MS predictor index */
228 if( decControl->nChannelsInternal == 2 ) {
229 if( lostFlag == FLAG_DECODE_NORMAL ||
230 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
231 {
232 silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
233 /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
234 if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
235 ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
236 {
237 silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
238 } else {
239 decode_only_middle = 0;
240 }
241 } else {
242 for( n = 0; n < 2; n++ ) {
243 MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
244 }
245 }
246 }
247
248 /* Reset side channel decoder prediction memory for first frame with side coding */
249 if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
250 silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
251 silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
252 psDec->channel_state[ 1 ].lagPrev = 100;
253 psDec->channel_state[ 1 ].LastGainIndex = 10;
254 psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
255 psDec->channel_state[ 1 ].first_frame_after_reset = 1;
256 }
257
258 /* Check if the temp buffer fits into the output PCM buffer. If it fits,
259 we can delay allocating the temp buffer until after the SILK peak stack
260 usage. We need to use a < and not a <= because of the two extra samples. */
261 delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
262 < decControl->API_sampleRate*decControl->nChannelsAPI;
263 ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
264 : decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
265 opus_int16 );
266 if ( delay_stack_alloc )
267 {
268 samplesOut1_tmp[ 0 ] = samplesOut;
269 samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
270 } else {
271 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
272 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
273 }
274
275 if( lostFlag == FLAG_DECODE_NORMAL ) {
276 has_side = !decode_only_middle;
277 } else {
278 has_side = !psDec->prev_decode_only_middle
279 || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
280 }
281 /* Call decoder for one frame */
282 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
283 if( n == 0 || has_side ) {
284 opus_int FrameIndex;
285 opus_int condCoding;
286
287 FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
288 /* Use independent coding if no previous frame available */
289 if( FrameIndex <= 0 ) {
290 condCoding = CODE_INDEPENDENTLY;
291 } else if( lostFlag == FLAG_DECODE_LBRR ) {
292 condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
293 } else if( n > 0 && psDec->prev_decode_only_middle ) {
294 /* If we skipped a side frame in this packet, we don't
295 need LTP scaling; the LTP state is well-defined. */
296 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
297 } else {
298 condCoding = CODE_CONDITIONALLY;
299 }
300 ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, arch);
301 } else {
302 silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
303 }
304 channel_state[ n ].nFramesDecoded++;
305 }
306
307 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
308 /* Convert Mid/Side to Left/Right */
309 silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
310 } else {
311 /* Buffering */
312 silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
313 silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
314 }
315
316 /* Number of output samples */
317 *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
318
319 /* Set up pointers to temp buffers */
320 ALLOC( samplesOut2_tmp,
321 decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
322 if( decControl->nChannelsAPI == 2 ) {
323 resample_out_ptr = samplesOut2_tmp;
324 } else {
325 resample_out_ptr = samplesOut;
326 }
327
328 ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
329 ? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
330 : ALLOC_NONE,
331 opus_int16 );
332 if ( delay_stack_alloc ) {
333 OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
334 samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
335 samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
336 }
337 for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
338
339 /* Resample decoded signal to API_sampleRate */
340 ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
341
342 /* Interleave if stereo output and stereo stream */
343 if( decControl->nChannelsAPI == 2 ) {
344 for( i = 0; i < *nSamplesOut; i++ ) {
345 samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
346 }
347 }
348 }
349
350 /* Create two channel output from mono stream */
351 if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
352 if ( stereo_to_mono ){
353 /* Resample right channel for newly collapsed stereo just in case
354 we weren't doing collapsing when switching to mono */
355 ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
356
357 for( i = 0; i < *nSamplesOut; i++ ) {
358 samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
359 }
360 } else {
361 for( i = 0; i < *nSamplesOut; i++ ) {
362 samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
363 }
364 }
365 }
366
367 /* Export pitch lag, measured at 48 kHz sampling rate */
368 if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
369 int mult_tab[ 3 ] = { 6, 4, 3 };
370 decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
371 } else {
372 decControl->prevPitchLag = 0;
373 }
374
375 if( lostFlag == FLAG_PACKET_LOST ) {
376 /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
377 if we lose packets when the energy is going down */
378 for ( i = 0; i < psDec->nChannelsInternal; i++ )
379 psDec->channel_state[ i ].LastGainIndex = 10;
380 } else {
381 psDec->prev_decode_only_middle = decode_only_middle;
382 }
383 RESTORE_STACK;
384 return ret;
385 }
386
387 #if 0
388 /* Getting table of contents for a packet */
389 opus_int silk_get_TOC(
390 const opus_uint8 *payload, /* I Payload data */
391 const opus_int nBytesIn, /* I Number of input bytes */
392 const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */
393 silk_TOC_struct *Silk_TOC /* O Type of content */
394 )
395 {
396 opus_int i, flags, ret = SILK_NO_ERROR;
397
398 if( nBytesIn < 1 ) {
399 return -1;
400 }
401 if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
402 return -1;
403 }
404
405 silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
406
407 /* For stereo, extract the flags for the mid channel */
408 flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
409
410 Silk_TOC->inbandFECFlag = flags & 1;
411 for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
412 flags = silk_RSHIFT( flags, 1 );
413 Silk_TOC->VADFlags[ i ] = flags & 1;
414 Silk_TOC->VADFlag |= flags & 1;
415 }
416
417 return ret;
418 }
419 #endif
420