1 /* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
2 Written by Jean-Marc Valin and Koen Vos */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include <stdarg.h>
33 #include "celt.h"
34 #include "entenc.h"
35 #include "modes.h"
36 #include "API.h"
37 #include "stack_alloc.h"
38 #include "float_cast.h"
39 #include "opus.h"
40 #include "arch.h"
41 #include "pitch.h"
42 #include "opus_private.h"
43 #include "os_support.h"
44 #include "cpu_support.h"
45 #include "analysis.h"
46 #include "mathops.h"
47 #include "tuning_parameters.h"
48 #ifdef FIXED_POINT
49 #include "fixed/structs_FIX.h"
50 #else
51 #include "float/structs_FLP.h"
52 #endif
53
54 #define MAX_ENCODER_BUFFER 480
55
56 typedef struct {
57 opus_val32 XX, XY, YY;
58 opus_val16 smoothed_width;
59 opus_val16 max_follower;
60 } StereoWidthState;
61
62 struct OpusEncoder {
63 int celt_enc_offset;
64 int silk_enc_offset;
65 silk_EncControlStruct silk_mode;
66 int application;
67 int channels;
68 int delay_compensation;
69 int force_channels;
70 int signal_type;
71 int user_bandwidth;
72 int max_bandwidth;
73 int user_forced_mode;
74 int voice_ratio;
75 opus_int32 Fs;
76 int use_vbr;
77 int vbr_constraint;
78 int variable_duration;
79 opus_int32 bitrate_bps;
80 opus_int32 user_bitrate_bps;
81 int lsb_depth;
82 int encoder_buffer;
83 int lfe;
84 int arch;
85 #ifndef DISABLE_FLOAT_API
86 TonalityAnalysisState analysis;
87 #endif
88
89 #define OPUS_ENCODER_RESET_START stream_channels
90 int stream_channels;
91 opus_int16 hybrid_stereo_width_Q14;
92 opus_int32 variable_HP_smth2_Q15;
93 opus_val16 prev_HB_gain;
94 opus_val32 hp_mem[4];
95 int mode;
96 int prev_mode;
97 int prev_channels;
98 int prev_framesize;
99 int bandwidth;
100 int silk_bw_switch;
101 /* Sampling rate (at the API level) */
102 int first;
103 opus_val16 * energy_masking;
104 StereoWidthState width_mem;
105 opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
106 #ifndef DISABLE_FLOAT_API
107 int detected_bandwidth;
108 #endif
109 opus_uint32 rangeFinal;
110 };
111
112 /* Transition tables for the voice and music. First column is the
113 middle (memoriless) threshold. The second column is the hysteresis
114 (difference with the middle) */
115 static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
116 11000, 1000, /* NB<->MB */
117 14000, 1000, /* MB<->WB */
118 17000, 1000, /* WB<->SWB */
119 21000, 2000, /* SWB<->FB */
120 };
121 static const opus_int32 mono_music_bandwidth_thresholds[8] = {
122 12000, 1000, /* NB<->MB */
123 15000, 1000, /* MB<->WB */
124 18000, 2000, /* WB<->SWB */
125 22000, 2000, /* SWB<->FB */
126 };
127 static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
128 11000, 1000, /* NB<->MB */
129 14000, 1000, /* MB<->WB */
130 21000, 2000, /* WB<->SWB */
131 28000, 2000, /* SWB<->FB */
132 };
133 static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
134 12000, 1000, /* NB<->MB */
135 18000, 2000, /* MB<->WB */
136 21000, 2000, /* WB<->SWB */
137 30000, 2000, /* SWB<->FB */
138 };
139 /* Threshold bit-rates for switching between mono and stereo */
140 static const opus_int32 stereo_voice_threshold = 30000;
141 static const opus_int32 stereo_music_threshold = 30000;
142
143 /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
144 static const opus_int32 mode_thresholds[2][2] = {
145 /* voice */ /* music */
146 { 64000, 16000}, /* mono */
147 { 36000, 16000}, /* stereo */
148 };
149
opus_encoder_get_size(int channels)150 int opus_encoder_get_size(int channels)
151 {
152 int silkEncSizeBytes, celtEncSizeBytes;
153 int ret;
154 if (channels<1 || channels > 2)
155 return 0;
156 ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
157 if (ret)
158 return 0;
159 silkEncSizeBytes = align(silkEncSizeBytes);
160 celtEncSizeBytes = celt_encoder_get_size(channels);
161 return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
162 }
163
opus_encoder_init(OpusEncoder * st,opus_int32 Fs,int channels,int application)164 int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
165 {
166 void *silk_enc;
167 CELTEncoder *celt_enc;
168 int err;
169 int ret, silkEncSizeBytes;
170
171 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
172 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
173 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
174 return OPUS_BAD_ARG;
175
176 OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
177 /* Create SILK encoder */
178 ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
179 if (ret)
180 return OPUS_BAD_ARG;
181 silkEncSizeBytes = align(silkEncSizeBytes);
182 st->silk_enc_offset = align(sizeof(OpusEncoder));
183 st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
184 silk_enc = (char*)st+st->silk_enc_offset;
185 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
186
187 st->stream_channels = st->channels = channels;
188
189 st->Fs = Fs;
190
191 st->arch = opus_select_arch();
192
193 ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
194 if(ret)return OPUS_INTERNAL_ERROR;
195
196 /* default SILK parameters */
197 st->silk_mode.nChannelsAPI = channels;
198 st->silk_mode.nChannelsInternal = channels;
199 st->silk_mode.API_sampleRate = st->Fs;
200 st->silk_mode.maxInternalSampleRate = 16000;
201 st->silk_mode.minInternalSampleRate = 8000;
202 st->silk_mode.desiredInternalSampleRate = 16000;
203 st->silk_mode.payloadSize_ms = 20;
204 st->silk_mode.bitRate = 25000;
205 st->silk_mode.packetLossPercentage = 0;
206 st->silk_mode.complexity = 9;
207 st->silk_mode.useInBandFEC = 0;
208 st->silk_mode.useDTX = 0;
209 st->silk_mode.useCBR = 0;
210 st->silk_mode.reducedDependency = 0;
211
212 /* Create CELT encoder */
213 /* Initialize CELT encoder */
214 err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
215 if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
216
217 celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
218 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
219
220 st->use_vbr = 1;
221 /* Makes constrained VBR the default (safer for real-time use) */
222 st->vbr_constraint = 1;
223 st->user_bitrate_bps = OPUS_AUTO;
224 st->bitrate_bps = 3000+Fs*channels;
225 st->application = application;
226 st->signal_type = OPUS_AUTO;
227 st->user_bandwidth = OPUS_AUTO;
228 st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
229 st->force_channels = OPUS_AUTO;
230 st->user_forced_mode = OPUS_AUTO;
231 st->voice_ratio = -1;
232 st->encoder_buffer = st->Fs/100;
233 st->lsb_depth = 24;
234 st->variable_duration = OPUS_FRAMESIZE_ARG;
235
236 /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
237 + 1.5 ms for SILK resamplers and stereo prediction) */
238 st->delay_compensation = st->Fs/250;
239
240 st->hybrid_stereo_width_Q14 = 1 << 14;
241 st->prev_HB_gain = Q15ONE;
242 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
243 st->first = 1;
244 st->mode = MODE_HYBRID;
245 st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
246
247 #ifndef DISABLE_FLOAT_API
248 tonality_analysis_init(&st->analysis);
249 #endif
250
251 return OPUS_OK;
252 }
253
gen_toc(int mode,int framerate,int bandwidth,int channels)254 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
255 {
256 int period;
257 unsigned char toc;
258 period = 0;
259 while (framerate < 400)
260 {
261 framerate <<= 1;
262 period++;
263 }
264 if (mode == MODE_SILK_ONLY)
265 {
266 toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
267 toc |= (period-2)<<3;
268 } else if (mode == MODE_CELT_ONLY)
269 {
270 int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
271 if (tmp < 0)
272 tmp = 0;
273 toc = 0x80;
274 toc |= tmp << 5;
275 toc |= period<<3;
276 } else /* Hybrid */
277 {
278 toc = 0x60;
279 toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
280 toc |= (period-2)<<3;
281 }
282 toc |= (channels==2)<<2;
283 return toc;
284 }
285
286 #ifndef FIXED_POINT
silk_biquad_float(const opus_val16 * in,const opus_int32 * B_Q28,const opus_int32 * A_Q28,opus_val32 * S,opus_val16 * out,const opus_int32 len,int stride)287 static void silk_biquad_float(
288 const opus_val16 *in, /* I: Input signal */
289 const opus_int32 *B_Q28, /* I: MA coefficients [3] */
290 const opus_int32 *A_Q28, /* I: AR coefficients [2] */
291 opus_val32 *S, /* I/O: State vector [2] */
292 opus_val16 *out, /* O: Output signal */
293 const opus_int32 len, /* I: Signal length (must be even) */
294 int stride
295 )
296 {
297 /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
298 opus_int k;
299 opus_val32 vout;
300 opus_val32 inval;
301 opus_val32 A[2], B[3];
302
303 A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
304 A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
305 B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
306 B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
307 B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
308
309 /* Negate A_Q28 values and split in two parts */
310
311 for( k = 0; k < len; k++ ) {
312 /* S[ 0 ], S[ 1 ]: Q12 */
313 inval = in[ k*stride ];
314 vout = S[ 0 ] + B[0]*inval;
315
316 S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;
317
318 S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
319
320 /* Scale back to Q0 and saturate */
321 out[ k*stride ] = vout;
322 }
323 }
324 #endif
325
hp_cutoff(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs)326 static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
327 {
328 opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
329 opus_int32 Fc_Q19, r_Q28, r_Q22;
330
331 silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
332 Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
333 silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
334
335 r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
336
337 /* b = r * [ 1; -2; 1 ]; */
338 /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
339 B_Q28[ 0 ] = r_Q28;
340 B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
341 B_Q28[ 2 ] = r_Q28;
342
343 /* -r * ( 2 - Fc * Fc ); */
344 r_Q22 = silk_RSHIFT( r_Q28, 6 );
345 A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) );
346 A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
347
348 #ifdef FIXED_POINT
349 silk_biquad_alt( in, B_Q28, A_Q28, hp_mem, out, len, channels );
350 if( channels == 2 ) {
351 silk_biquad_alt( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
352 }
353 #else
354 silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
355 if( channels == 2 ) {
356 silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
357 }
358 #endif
359 }
360
361 #ifdef FIXED_POINT
dc_reject(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs)362 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
363 {
364 int c, i;
365 int shift;
366
367 /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
368 shift=celt_ilog2(Fs/(cutoff_Hz*3));
369 for (c=0;c<channels;c++)
370 {
371 for (i=0;i<len;i++)
372 {
373 opus_val32 x, tmp, y;
374 x = SHL32(EXTEND32(in[channels*i+c]), 15);
375 /* First stage */
376 tmp = x-hp_mem[2*c];
377 hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
378 /* Second stage */
379 y = tmp - hp_mem[2*c+1];
380 hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
381 out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
382 }
383 }
384 }
385
386 #else
dc_reject(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs)387 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
388 {
389 int c, i;
390 float coef;
391
392 coef = 4.0f*cutoff_Hz/Fs;
393 for (c=0;c<channels;c++)
394 {
395 for (i=0;i<len;i++)
396 {
397 opus_val32 x, tmp, y;
398 x = in[channels*i+c];
399 /* First stage */
400 tmp = x-hp_mem[2*c];
401 hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]) + VERY_SMALL;
402 /* Second stage */
403 y = tmp - hp_mem[2*c+1];
404 hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]) + VERY_SMALL;
405 out[channels*i+c] = y;
406 }
407 }
408 }
409 #endif
410
stereo_fade(const opus_val16 * in,opus_val16 * out,opus_val16 g1,opus_val16 g2,int overlap48,int frame_size,int channels,const opus_val16 * window,opus_int32 Fs)411 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
412 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
413 {
414 int i;
415 int overlap;
416 int inc;
417 inc = 48000/Fs;
418 overlap=overlap48/inc;
419 g1 = Q15ONE-g1;
420 g2 = Q15ONE-g2;
421 for (i=0;i<overlap;i++)
422 {
423 opus_val32 diff;
424 opus_val16 g, w;
425 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
426 g = SHR32(MAC16_16(MULT16_16(w,g2),
427 Q15ONE-w, g1), 15);
428 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
429 diff = MULT16_16_Q15(g, diff);
430 out[i*channels] = out[i*channels] - diff;
431 out[i*channels+1] = out[i*channels+1] + diff;
432 }
433 for (;i<frame_size;i++)
434 {
435 opus_val32 diff;
436 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
437 diff = MULT16_16_Q15(g2, diff);
438 out[i*channels] = out[i*channels] - diff;
439 out[i*channels+1] = out[i*channels+1] + diff;
440 }
441 }
442
gain_fade(const opus_val16 * in,opus_val16 * out,opus_val16 g1,opus_val16 g2,int overlap48,int frame_size,int channels,const opus_val16 * window,opus_int32 Fs)443 static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
444 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
445 {
446 int i;
447 int inc;
448 int overlap;
449 int c;
450 inc = 48000/Fs;
451 overlap=overlap48/inc;
452 if (channels==1)
453 {
454 for (i=0;i<overlap;i++)
455 {
456 opus_val16 g, w;
457 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
458 g = SHR32(MAC16_16(MULT16_16(w,g2),
459 Q15ONE-w, g1), 15);
460 out[i] = MULT16_16_Q15(g, in[i]);
461 }
462 } else {
463 for (i=0;i<overlap;i++)
464 {
465 opus_val16 g, w;
466 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
467 g = SHR32(MAC16_16(MULT16_16(w,g2),
468 Q15ONE-w, g1), 15);
469 out[i*2] = MULT16_16_Q15(g, in[i*2]);
470 out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
471 }
472 }
473 c=0;do {
474 for (i=overlap;i<frame_size;i++)
475 {
476 out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
477 }
478 }
479 while (++c<channels);
480 }
481
opus_encoder_create(opus_int32 Fs,int channels,int application,int * error)482 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
483 {
484 int ret;
485 OpusEncoder *st;
486 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
487 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
488 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
489 {
490 if (error)
491 *error = OPUS_BAD_ARG;
492 return NULL;
493 }
494 st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
495 if (st == NULL)
496 {
497 if (error)
498 *error = OPUS_ALLOC_FAIL;
499 return NULL;
500 }
501 ret = opus_encoder_init(st, Fs, channels, application);
502 if (error)
503 *error = ret;
504 if (ret != OPUS_OK)
505 {
506 opus_free(st);
507 st = NULL;
508 }
509 return st;
510 }
511
user_bitrate_to_bitrate(OpusEncoder * st,int frame_size,int max_data_bytes)512 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
513 {
514 if(!frame_size)frame_size=st->Fs/400;
515 if (st->user_bitrate_bps==OPUS_AUTO)
516 return 60*st->Fs/frame_size + st->Fs*st->channels;
517 else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
518 return max_data_bytes*8*st->Fs/frame_size;
519 else
520 return st->user_bitrate_bps;
521 }
522
523 #ifndef DISABLE_FLOAT_API
524 /* Don't use more than 60 ms for the frame size analysis */
525 #define MAX_DYNAMIC_FRAMESIZE 24
526 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */
transient_boost(const float * E,const float * E_1,int LM,int maxM)527 static float transient_boost(const float *E, const float *E_1, int LM, int maxM)
528 {
529 int i;
530 int M;
531 float sumE=0, sumE_1=0;
532 float metric;
533
534 M = IMIN(maxM, (1<<LM)+1);
535 for (i=0;i<M;i++)
536 {
537 sumE += E[i];
538 sumE_1 += E_1[i];
539 }
540 metric = sumE*sumE_1/(M*M);
541 /*if (LM==3)
542 printf("%f\n", metric);*/
543 /*return metric>10 ? 1 : 0;*/
544 /*return MAX16(0,1-exp(-.25*(metric-2.)));*/
545 return MIN16(1,(float)sqrt(MAX16(0,.05f*(metric-2))));
546 }
547
548 /* Viterbi decoding trying to find the best frame size combination using look-ahead
549
550 State numbering:
551 0: unused
552 1: 2.5 ms
553 2: 5 ms (#1)
554 3: 5 ms (#2)
555 4: 10 ms (#1)
556 5: 10 ms (#2)
557 6: 10 ms (#3)
558 7: 10 ms (#4)
559 8: 20 ms (#1)
560 9: 20 ms (#2)
561 10: 20 ms (#3)
562 11: 20 ms (#4)
563 12: 20 ms (#5)
564 13: 20 ms (#6)
565 14: 20 ms (#7)
566 15: 20 ms (#8)
567 */
transient_viterbi(const float * E,const float * E_1,int N,int frame_cost,int rate)568 static int transient_viterbi(const float *E, const float *E_1, int N, int frame_cost, int rate)
569 {
570 int i;
571 float cost[MAX_DYNAMIC_FRAMESIZE][16];
572 int states[MAX_DYNAMIC_FRAMESIZE][16];
573 float best_cost;
574 int best_state;
575 float factor;
576 /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */
577 if (rate<80)
578 factor=0;
579 else if (rate>160)
580 factor=1;
581 else
582 factor = (rate-80.f)/80.f;
583 /* Makes variable framesize less aggressive at lower bitrates, but I can't
584 find any valid theoretical justification for this (other than it seems
585 to help) */
586 for (i=0;i<16;i++)
587 {
588 /* Impossible state */
589 states[0][i] = -1;
590 cost[0][i] = 1e10;
591 }
592 for (i=0;i<4;i++)
593 {
594 cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1));
595 states[0][1<<i] = i;
596 }
597 for (i=1;i<N;i++)
598 {
599 int j;
600
601 /* Follow continuations */
602 for (j=2;j<16;j++)
603 {
604 cost[i][j] = cost[i-1][j-1];
605 states[i][j] = j-1;
606 }
607
608 /* New frames */
609 for(j=0;j<4;j++)
610 {
611 int k;
612 float min_cost;
613 float curr_cost;
614 states[i][1<<j] = 1;
615 min_cost = cost[i-1][1];
616 for(k=1;k<4;k++)
617 {
618 float tmp = cost[i-1][(1<<(k+1))-1];
619 if (tmp < min_cost)
620 {
621 states[i][1<<j] = (1<<(k+1))-1;
622 min_cost = tmp;
623 }
624 }
625 curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1));
626 cost[i][1<<j] = min_cost;
627 /* If part of the frame is outside the analysis window, only count part of the cost */
628 if (N-i < (1<<j))
629 cost[i][1<<j] += curr_cost*(float)(N-i)/(1<<j);
630 else
631 cost[i][1<<j] += curr_cost;
632 }
633 }
634
635 best_state=1;
636 best_cost = cost[N-1][1];
637 /* Find best end state (doesn't force a frame to end at N-1) */
638 for (i=2;i<16;i++)
639 {
640 if (cost[N-1][i]<best_cost)
641 {
642 best_cost = cost[N-1][i];
643 best_state = i;
644 }
645 }
646
647 /* Follow transitions back */
648 for (i=N-1;i>=0;i--)
649 {
650 /*printf("%d ", best_state);*/
651 best_state = states[i][best_state];
652 }
653 /*printf("%d\n", best_state);*/
654 return best_state;
655 }
656
optimize_framesize(const void * x,int len,int C,opus_int32 Fs,int bitrate,opus_val16 tonality,float * mem,int buffering,downmix_func downmix)657 static int optimize_framesize(const void *x, int len, int C, opus_int32 Fs,
658 int bitrate, opus_val16 tonality, float *mem, int buffering,
659 downmix_func downmix)
660 {
661 int N;
662 int i;
663 float e[MAX_DYNAMIC_FRAMESIZE+4];
664 float e_1[MAX_DYNAMIC_FRAMESIZE+3];
665 opus_val32 memx;
666 int bestLM=0;
667 int subframe;
668 int pos;
669 int offset;
670 VARDECL(opus_val32, sub);
671
672 subframe = Fs/400;
673 ALLOC(sub, subframe, opus_val32);
674 e[0]=mem[0];
675 e_1[0]=1.f/(EPSILON+mem[0]);
676 if (buffering)
677 {
678 /* Consider the CELT delay when not in restricted-lowdelay */
679 /* We assume the buffering is between 2.5 and 5 ms */
680 offset = 2*subframe - buffering;
681 celt_assert(offset>=0 && offset <= subframe);
682 len -= offset;
683 e[1]=mem[1];
684 e_1[1]=1.f/(EPSILON+mem[1]);
685 e[2]=mem[2];
686 e_1[2]=1.f/(EPSILON+mem[2]);
687 pos = 3;
688 } else {
689 pos=1;
690 offset=0;
691 }
692 N=IMIN(len/subframe, MAX_DYNAMIC_FRAMESIZE);
693 /* Just silencing a warning, it's really initialized later */
694 memx = 0;
695 for (i=0;i<N;i++)
696 {
697 float tmp;
698 opus_val32 tmpx;
699 int j;
700 tmp=EPSILON;
701
702 downmix(x, sub, subframe, i*subframe+offset, 0, -2, C);
703 if (i==0)
704 memx = sub[0];
705 for (j=0;j<subframe;j++)
706 {
707 tmpx = sub[j];
708 tmp += (tmpx-memx)*(float)(tmpx-memx);
709 memx = tmpx;
710 }
711 e[i+pos] = tmp;
712 e_1[i+pos] = 1.f/tmp;
713 }
714 /* Hack to get 20 ms working with APPLICATION_AUDIO
715 The real problem is that the corresponding memory needs to use 1.5 ms
716 from this frame and 1 ms from the next frame */
717 e[i+pos] = e[i+pos-1];
718 if (buffering)
719 N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2);
720 bestLM = transient_viterbi(e, e_1, N, (int)((1.f+.5f*tonality)*(60*C+40)), bitrate/400);
721 mem[0] = e[1<<bestLM];
722 if (buffering)
723 {
724 mem[1] = e[(1<<bestLM)+1];
725 mem[2] = e[(1<<bestLM)+2];
726 }
727 return bestLM;
728 }
729
730 #endif
731
732 #ifndef DISABLE_FLOAT_API
733 #ifdef FIXED_POINT
734 #define PCM2VAL(x) FLOAT2INT16(x)
735 #else
736 #define PCM2VAL(x) SCALEIN(x)
737 #endif
downmix_float(const void * _x,opus_val32 * sub,int subframe,int offset,int c1,int c2,int C)738 void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
739 {
740 const float *x;
741 opus_val32 scale;
742 int j;
743 x = (const float *)_x;
744 for (j=0;j<subframe;j++)
745 sub[j] = PCM2VAL(x[(j+offset)*C+c1]);
746 if (c2>-1)
747 {
748 for (j=0;j<subframe;j++)
749 sub[j] += PCM2VAL(x[(j+offset)*C+c2]);
750 } else if (c2==-2)
751 {
752 int c;
753 for (c=1;c<C;c++)
754 {
755 for (j=0;j<subframe;j++)
756 sub[j] += PCM2VAL(x[(j+offset)*C+c]);
757 }
758 }
759 #ifdef FIXED_POINT
760 scale = (1<<SIG_SHIFT);
761 #else
762 scale = 1.f;
763 #endif
764 if (C==-2)
765 scale /= C;
766 else
767 scale /= 2;
768 for (j=0;j<subframe;j++)
769 sub[j] *= scale;
770 }
771 #endif
772
downmix_int(const void * _x,opus_val32 * sub,int subframe,int offset,int c1,int c2,int C)773 void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C)
774 {
775 const opus_int16 *x;
776 opus_val32 scale;
777 int j;
778 x = (const opus_int16 *)_x;
779 for (j=0;j<subframe;j++)
780 sub[j] = x[(j+offset)*C+c1];
781 if (c2>-1)
782 {
783 for (j=0;j<subframe;j++)
784 sub[j] += x[(j+offset)*C+c2];
785 } else if (c2==-2)
786 {
787 int c;
788 for (c=1;c<C;c++)
789 {
790 for (j=0;j<subframe;j++)
791 sub[j] += x[(j+offset)*C+c];
792 }
793 }
794 #ifdef FIXED_POINT
795 scale = (1<<SIG_SHIFT);
796 #else
797 scale = 1.f/32768;
798 #endif
799 if (C==-2)
800 scale /= C;
801 else
802 scale /= 2;
803 for (j=0;j<subframe;j++)
804 sub[j] *= scale;
805 }
806
frame_size_select(opus_int32 frame_size,int variable_duration,opus_int32 Fs)807 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
808 {
809 int new_size;
810 if (frame_size<Fs/400)
811 return -1;
812 if (variable_duration == OPUS_FRAMESIZE_ARG)
813 new_size = frame_size;
814 else if (variable_duration == OPUS_FRAMESIZE_VARIABLE)
815 new_size = Fs/50;
816 else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_60_MS)
817 new_size = IMIN(3*Fs/50, (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS));
818 else
819 return -1;
820 if (new_size>frame_size)
821 return -1;
822 if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
823 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs)
824 return -1;
825 return new_size;
826 }
827
compute_frame_size(const void * analysis_pcm,int frame_size,int variable_duration,int C,opus_int32 Fs,int bitrate_bps,int delay_compensation,downmix_func downmix,float * subframe_mem)828 opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size,
829 int variable_duration, int C, opus_int32 Fs, int bitrate_bps,
830 int delay_compensation, downmix_func downmix
831 #ifndef DISABLE_FLOAT_API
832 , float *subframe_mem
833 #endif
834 )
835 {
836 #ifndef DISABLE_FLOAT_API
837 if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200)
838 {
839 int LM = 3;
840 LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps,
841 0, subframe_mem, delay_compensation, downmix);
842 while ((Fs/400<<LM)>frame_size)
843 LM--;
844 frame_size = (Fs/400<<LM);
845 } else
846 #else
847 (void)analysis_pcm;
848 (void)C;
849 (void)bitrate_bps;
850 (void)delay_compensation;
851 (void)downmix;
852 #endif
853 {
854 frame_size = frame_size_select(frame_size, variable_duration, Fs);
855 }
856 if (frame_size<0)
857 return -1;
858 return frame_size;
859 }
860
compute_stereo_width(const opus_val16 * pcm,int frame_size,opus_int32 Fs,StereoWidthState * mem)861 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
862 {
863 opus_val32 xx, xy, yy;
864 opus_val16 sqrt_xx, sqrt_yy;
865 opus_val16 qrrt_xx, qrrt_yy;
866 int frame_rate;
867 int i;
868 opus_val16 short_alpha;
869
870 frame_rate = Fs/frame_size;
871 short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate);
872 xx=xy=yy=0;
873 /* Unroll by 4. The frame size is always a multiple of 4 *except* for
874 2.5 ms frames at 12 kHz. Since this setting is very rare (and very
875 stupid), we just discard the last two samples. */
876 for (i=0;i<frame_size-3;i+=4)
877 {
878 opus_val32 pxx=0;
879 opus_val32 pxy=0;
880 opus_val32 pyy=0;
881 opus_val16 x, y;
882 x = pcm[2*i];
883 y = pcm[2*i+1];
884 pxx = SHR32(MULT16_16(x,x),2);
885 pxy = SHR32(MULT16_16(x,y),2);
886 pyy = SHR32(MULT16_16(y,y),2);
887 x = pcm[2*i+2];
888 y = pcm[2*i+3];
889 pxx += SHR32(MULT16_16(x,x),2);
890 pxy += SHR32(MULT16_16(x,y),2);
891 pyy += SHR32(MULT16_16(y,y),2);
892 x = pcm[2*i+4];
893 y = pcm[2*i+5];
894 pxx += SHR32(MULT16_16(x,x),2);
895 pxy += SHR32(MULT16_16(x,y),2);
896 pyy += SHR32(MULT16_16(y,y),2);
897 x = pcm[2*i+6];
898 y = pcm[2*i+7];
899 pxx += SHR32(MULT16_16(x,x),2);
900 pxy += SHR32(MULT16_16(x,y),2);
901 pyy += SHR32(MULT16_16(y,y),2);
902
903 xx += SHR32(pxx, 10);
904 xy += SHR32(pxy, 10);
905 yy += SHR32(pyy, 10);
906 }
907 mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
908 mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
909 mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
910 mem->XX = MAX32(0, mem->XX);
911 mem->XY = MAX32(0, mem->XY);
912 mem->YY = MAX32(0, mem->YY);
913 if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
914 {
915 opus_val16 corr;
916 opus_val16 ldiff;
917 opus_val16 width;
918 sqrt_xx = celt_sqrt(mem->XX);
919 sqrt_yy = celt_sqrt(mem->YY);
920 qrrt_xx = celt_sqrt(sqrt_xx);
921 qrrt_yy = celt_sqrt(sqrt_yy);
922 /* Inter-channel correlation */
923 mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
924 corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
925 /* Approximate loudness difference */
926 ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy);
927 width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
928 /* Smoothing over one second */
929 mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
930 /* Peak follower */
931 mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
932 }
933 /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
934 return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower)));
935 }
936
opus_encode_native(OpusEncoder * st,const opus_val16 * pcm,int frame_size,unsigned char * data,opus_int32 out_data_bytes,int lsb_depth,const void * analysis_pcm,opus_int32 analysis_size,int c1,int c2,int analysis_channels,downmix_func downmix,int float_api)937 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
938 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
939 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
940 int analysis_channels, downmix_func downmix, int float_api)
941 {
942 void *silk_enc;
943 CELTEncoder *celt_enc;
944 int i;
945 int ret=0;
946 opus_int32 nBytes;
947 ec_enc enc;
948 int bytes_target;
949 int prefill=0;
950 int start_band = 0;
951 int redundancy = 0;
952 int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
953 int celt_to_silk = 0;
954 VARDECL(opus_val16, pcm_buf);
955 int nb_compr_bytes;
956 int to_celt = 0;
957 opus_uint32 redundant_rng = 0;
958 int cutoff_Hz, hp_freq_smth1;
959 int voice_est; /* Probability of voice in Q7 */
960 opus_int32 equiv_rate;
961 int delay_compensation;
962 int frame_rate;
963 opus_int32 max_rate; /* Max bitrate we're allowed to use */
964 int curr_bandwidth;
965 opus_val16 HB_gain;
966 opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
967 int total_buffer;
968 opus_val16 stereo_width;
969 const CELTMode *celt_mode;
970 #ifndef DISABLE_FLOAT_API
971 AnalysisInfo analysis_info;
972 int analysis_read_pos_bak=-1;
973 int analysis_read_subframe_bak=-1;
974 #endif
975 VARDECL(opus_val16, tmp_prefill);
976
977 ALLOC_STACK;
978
979 max_data_bytes = IMIN(1276, out_data_bytes);
980
981 st->rangeFinal = 0;
982 if ((!st->variable_duration && 400*frame_size != st->Fs && 200*frame_size != st->Fs && 100*frame_size != st->Fs &&
983 50*frame_size != st->Fs && 25*frame_size != st->Fs && 50*frame_size != 3*st->Fs)
984 || (400*frame_size < st->Fs)
985 || max_data_bytes<=0
986 )
987 {
988 RESTORE_STACK;
989 return OPUS_BAD_ARG;
990 }
991 silk_enc = (char*)st+st->silk_enc_offset;
992 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
993 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
994 delay_compensation = 0;
995 else
996 delay_compensation = st->delay_compensation;
997
998 lsb_depth = IMIN(lsb_depth, st->lsb_depth);
999
1000 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
1001 #ifndef DISABLE_FLOAT_API
1002 analysis_info.valid = 0;
1003 #ifdef FIXED_POINT
1004 if (st->silk_mode.complexity >= 10 && st->Fs==48000)
1005 #else
1006 if (st->silk_mode.complexity >= 7 && st->Fs==48000)
1007 #endif
1008 {
1009 analysis_read_pos_bak = st->analysis.read_pos;
1010 analysis_read_subframe_bak = st->analysis.read_subframe;
1011 run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
1012 c1, c2, analysis_channels, st->Fs,
1013 lsb_depth, downmix, &analysis_info);
1014 }
1015 #else
1016 (void)analysis_pcm;
1017 (void)analysis_size;
1018 #endif
1019
1020 st->voice_ratio = -1;
1021
1022 #ifndef DISABLE_FLOAT_API
1023 st->detected_bandwidth = 0;
1024 if (analysis_info.valid)
1025 {
1026 int analysis_bandwidth;
1027 if (st->signal_type == OPUS_AUTO)
1028 st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
1029
1030 analysis_bandwidth = analysis_info.bandwidth;
1031 if (analysis_bandwidth<=12)
1032 st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1033 else if (analysis_bandwidth<=14)
1034 st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1035 else if (analysis_bandwidth<=16)
1036 st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1037 else if (analysis_bandwidth<=18)
1038 st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1039 else
1040 st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
1041 }
1042 #endif
1043
1044 if (st->channels==2 && st->force_channels!=1)
1045 stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
1046 else
1047 stereo_width = 0;
1048 total_buffer = delay_compensation;
1049 st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
1050
1051 frame_rate = st->Fs/frame_size;
1052 if (!st->use_vbr)
1053 {
1054 int cbrBytes;
1055 /* Multiply by 3 to make sure the division is exact. */
1056 int frame_rate3 = 3*st->Fs/frame_size;
1057 /* We need to make sure that "int" values always fit in 16 bits. */
1058 cbrBytes = IMIN( (3*st->bitrate_bps/8 + frame_rate3/2)/frame_rate3, max_data_bytes);
1059 st->bitrate_bps = cbrBytes*(opus_int32)frame_rate3*8/3;
1060 max_data_bytes = cbrBytes;
1061 }
1062 if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
1063 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
1064 {
1065 /*If the space is too low to do something useful, emit 'PLC' frames.*/
1066 int tocmode = st->mode;
1067 int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
1068 if (tocmode==0)
1069 tocmode = MODE_SILK_ONLY;
1070 if (frame_rate>100)
1071 tocmode = MODE_CELT_ONLY;
1072 if (frame_rate < 50)
1073 tocmode = MODE_SILK_ONLY;
1074 if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
1075 bw=OPUS_BANDWIDTH_WIDEBAND;
1076 else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
1077 bw=OPUS_BANDWIDTH_NARROWBAND;
1078 else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
1079 bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
1080 data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
1081 ret = 1;
1082 if (!st->use_vbr)
1083 {
1084 ret = opus_packet_pad(data, ret, max_data_bytes);
1085 if (ret == OPUS_OK)
1086 ret = max_data_bytes;
1087 }
1088 RESTORE_STACK;
1089 return ret;
1090 }
1091 max_rate = frame_rate*max_data_bytes*8;
1092
1093 /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
1094 equiv_rate = st->bitrate_bps - (40*st->channels+20)*(st->Fs/frame_size - 50);
1095
1096 if (st->signal_type == OPUS_SIGNAL_VOICE)
1097 voice_est = 127;
1098 else if (st->signal_type == OPUS_SIGNAL_MUSIC)
1099 voice_est = 0;
1100 else if (st->voice_ratio >= 0)
1101 {
1102 voice_est = st->voice_ratio*327>>8;
1103 /* For AUDIO, never be more than 90% confident of having speech */
1104 if (st->application == OPUS_APPLICATION_AUDIO)
1105 voice_est = IMIN(voice_est, 115);
1106 } else if (st->application == OPUS_APPLICATION_VOIP)
1107 voice_est = 115;
1108 else
1109 voice_est = 48;
1110
1111 if (st->force_channels!=OPUS_AUTO && st->channels == 2)
1112 {
1113 st->stream_channels = st->force_channels;
1114 } else {
1115 #ifdef FUZZING
1116 /* Random mono/stereo decision */
1117 if (st->channels == 2 && (rand()&0x1F)==0)
1118 st->stream_channels = 3-st->stream_channels;
1119 #else
1120 /* Rate-dependent mono-stereo decision */
1121 if (st->channels == 2)
1122 {
1123 opus_int32 stereo_threshold;
1124 stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
1125 if (st->stream_channels == 2)
1126 stereo_threshold -= 1000;
1127 else
1128 stereo_threshold += 1000;
1129 st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
1130 } else {
1131 st->stream_channels = st->channels;
1132 }
1133 #endif
1134 }
1135 equiv_rate = st->bitrate_bps - (40*st->stream_channels+20)*(st->Fs/frame_size - 50);
1136
1137 /* Mode selection depending on application and signal type */
1138 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
1139 {
1140 st->mode = MODE_CELT_ONLY;
1141 } else if (st->user_forced_mode == OPUS_AUTO)
1142 {
1143 #ifdef FUZZING
1144 /* Random mode switching */
1145 if ((rand()&0xF)==0)
1146 {
1147 if ((rand()&0x1)==0)
1148 st->mode = MODE_CELT_ONLY;
1149 else
1150 st->mode = MODE_SILK_ONLY;
1151 } else {
1152 if (st->prev_mode==MODE_CELT_ONLY)
1153 st->mode = MODE_CELT_ONLY;
1154 else
1155 st->mode = MODE_SILK_ONLY;
1156 }
1157 #else
1158 opus_int32 mode_voice, mode_music;
1159 opus_int32 threshold;
1160
1161 /* Interpolate based on stereo width */
1162 mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0])
1163 + MULT16_32_Q15(stereo_width,mode_thresholds[1][0]));
1164 mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
1165 + MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
1166 /* Interpolate based on speech/music probability */
1167 threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
1168 /* Bias towards SILK for VoIP because of some useful features */
1169 if (st->application == OPUS_APPLICATION_VOIP)
1170 threshold += 8000;
1171
1172 /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/
1173 /* Hysteresis */
1174 if (st->prev_mode == MODE_CELT_ONLY)
1175 threshold -= 4000;
1176 else if (st->prev_mode>0)
1177 threshold += 4000;
1178
1179 st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
1180
1181 /* When FEC is enabled and there's enough packet loss, use SILK */
1182 if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4)
1183 st->mode = MODE_SILK_ONLY;
1184 /* When encoding voice and DTX is enabled, set the encoder to SILK mode (at least for now) */
1185 if (st->silk_mode.useDTX && voice_est > 100)
1186 st->mode = MODE_SILK_ONLY;
1187 #endif
1188 } else {
1189 st->mode = st->user_forced_mode;
1190 }
1191
1192 /* Override the chosen mode to make sure we meet the requested frame size */
1193 if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100)
1194 st->mode = MODE_CELT_ONLY;
1195 if (st->lfe)
1196 st->mode = MODE_CELT_ONLY;
1197 /* If max_data_bytes represents less than 8 kb/s, switch to CELT-only mode */
1198 if (max_data_bytes < (frame_rate > 50 ? 12000 : 8000)*frame_size / (st->Fs * 8))
1199 st->mode = MODE_CELT_ONLY;
1200
1201 if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0
1202 && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)
1203 {
1204 /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
1205 st->silk_mode.toMono = 1;
1206 st->stream_channels = 2;
1207 } else {
1208 st->silk_mode.toMono = 0;
1209 }
1210
1211 if (st->prev_mode > 0 &&
1212 ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
1213 (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
1214 {
1215 redundancy = 1;
1216 celt_to_silk = (st->mode != MODE_CELT_ONLY);
1217 if (!celt_to_silk)
1218 {
1219 /* Switch to SILK/hybrid if frame size is 10 ms or more*/
1220 if (frame_size >= st->Fs/100)
1221 {
1222 st->mode = st->prev_mode;
1223 to_celt = 1;
1224 } else {
1225 redundancy=0;
1226 }
1227 }
1228 }
1229 /* For the first frame at a new SILK bandwidth */
1230 if (st->silk_bw_switch)
1231 {
1232 redundancy = 1;
1233 celt_to_silk = 1;
1234 st->silk_bw_switch = 0;
1235 prefill=1;
1236 }
1237
1238 if (redundancy)
1239 {
1240 /* Fair share of the max size allowed */
1241 redundancy_bytes = IMIN(257, max_data_bytes*(opus_int32)(st->Fs/200)/(frame_size+st->Fs/200));
1242 /* For VBR, target the actual bitrate (subject to the limit above) */
1243 if (st->use_vbr)
1244 redundancy_bytes = IMIN(redundancy_bytes, st->bitrate_bps/1600);
1245 }
1246
1247 if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
1248 {
1249 silk_EncControlStruct dummy;
1250 silk_InitEncoder( silk_enc, st->arch, &dummy);
1251 prefill=1;
1252 }
1253
1254 /* Automatic (rate-dependent) bandwidth selection */
1255 if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
1256 {
1257 const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
1258 opus_int32 bandwidth_thresholds[8];
1259 int bandwidth = OPUS_BANDWIDTH_FULLBAND;
1260 opus_int32 equiv_rate2;
1261
1262 equiv_rate2 = equiv_rate;
1263 if (st->mode != MODE_CELT_ONLY)
1264 {
1265 /* Adjust the threshold +/- 10% depending on complexity */
1266 equiv_rate2 = equiv_rate2 * (45+st->silk_mode.complexity)/50;
1267 /* CBR is less efficient by ~1 kb/s */
1268 if (!st->use_vbr)
1269 equiv_rate2 -= 1000;
1270 }
1271 if (st->channels==2 && st->force_channels!=1)
1272 {
1273 voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
1274 music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
1275 } else {
1276 voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
1277 music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
1278 }
1279 /* Interpolate bandwidth thresholds depending on voice estimation */
1280 for (i=0;i<8;i++)
1281 {
1282 bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
1283 + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
1284 }
1285 do {
1286 int threshold, hysteresis;
1287 threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
1288 hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1];
1289 if (!st->first)
1290 {
1291 if (st->bandwidth >= bandwidth)
1292 threshold -= hysteresis;
1293 else
1294 threshold += hysteresis;
1295 }
1296 if (equiv_rate2 >= threshold)
1297 break;
1298 } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
1299 st->bandwidth = bandwidth;
1300 /* Prevents any transition to SWB/FB until the SILK layer has fully
1301 switched to WB mode and turned the variable LP filter off */
1302 if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1303 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1304 }
1305
1306 if (st->bandwidth>st->max_bandwidth)
1307 st->bandwidth = st->max_bandwidth;
1308
1309 if (st->user_bandwidth != OPUS_AUTO)
1310 st->bandwidth = st->user_bandwidth;
1311
1312 /* This prevents us from using hybrid at unsafe CBR/max rates */
1313 if (st->mode != MODE_CELT_ONLY && max_rate < 15000)
1314 {
1315 st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND);
1316 }
1317
1318 /* Prevents Opus from wasting bits on frequencies that are above
1319 the Nyquist rate of the input signal */
1320 if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND)
1321 st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1322 if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1323 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1324 if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND)
1325 st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1326 if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
1327 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1328 #ifndef DISABLE_FLOAT_API
1329 /* Use detected bandwidth to reduce the encoded bandwidth. */
1330 if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO)
1331 {
1332 int min_detected_bandwidth;
1333 /* Makes bandwidth detection more conservative just in case the detector
1334 gets it wrong when we could have coded a high bandwidth transparently.
1335 When operating in SILK/hybrid mode, we don't go below wideband to avoid
1336 more complicated switches that require redundancy. */
1337 if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY)
1338 min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1339 else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY)
1340 min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1341 else if (equiv_rate <= 30000*st->stream_channels)
1342 min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1343 else if (equiv_rate <= 44000*st->stream_channels)
1344 min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1345 else
1346 min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
1347
1348 st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth);
1349 st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth);
1350 }
1351 #endif
1352 celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth));
1353
1354 /* CELT mode doesn't support mediumband, use wideband instead */
1355 if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
1356 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1357 if (st->lfe)
1358 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1359
1360 /* Can't support higher than wideband for >20 ms frames */
1361 if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND))
1362 {
1363 VARDECL(unsigned char, tmp_data);
1364 int nb_frames;
1365 int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
1366 VARDECL(OpusRepacketizer, rp);
1367 opus_int32 bytes_per_frame;
1368 opus_int32 repacketize_len;
1369
1370 #ifndef DISABLE_FLOAT_API
1371 if (analysis_read_pos_bak!= -1)
1372 {
1373 st->analysis.read_pos = analysis_read_pos_bak;
1374 st->analysis.read_subframe = analysis_read_subframe_bak;
1375 }
1376 #endif
1377
1378 nb_frames = frame_size > st->Fs/25 ? 3 : 2;
1379 bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames);
1380
1381 ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
1382
1383 ALLOC(rp, 1, OpusRepacketizer);
1384 opus_repacketizer_init(rp);
1385
1386 bak_mode = st->user_forced_mode;
1387 bak_bandwidth = st->user_bandwidth;
1388 bak_channels = st->force_channels;
1389
1390 st->user_forced_mode = st->mode;
1391 st->user_bandwidth = st->bandwidth;
1392 st->force_channels = st->stream_channels;
1393 bak_to_mono = st->silk_mode.toMono;
1394
1395 if (bak_to_mono)
1396 st->force_channels = 1;
1397 else
1398 st->prev_channels = st->stream_channels;
1399 for (i=0;i<nb_frames;i++)
1400 {
1401 int tmp_len;
1402 st->silk_mode.toMono = 0;
1403 /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
1404 if (to_celt && i==nb_frames-1)
1405 st->user_forced_mode = MODE_CELT_ONLY;
1406 tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50,
1407 tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth,
1408 NULL, 0, c1, c2, analysis_channels, downmix, float_api);
1409 if (tmp_len<0)
1410 {
1411 RESTORE_STACK;
1412 return OPUS_INTERNAL_ERROR;
1413 }
1414 ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);
1415 if (ret<0)
1416 {
1417 RESTORE_STACK;
1418 return OPUS_INTERNAL_ERROR;
1419 }
1420 }
1421 if (st->use_vbr)
1422 repacketize_len = out_data_bytes;
1423 else
1424 repacketize_len = IMIN(3*st->bitrate_bps/(3*8*50/nb_frames), out_data_bytes);
1425 ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);
1426 if (ret<0)
1427 {
1428 RESTORE_STACK;
1429 return OPUS_INTERNAL_ERROR;
1430 }
1431 st->user_forced_mode = bak_mode;
1432 st->user_bandwidth = bak_bandwidth;
1433 st->force_channels = bak_channels;
1434 st->silk_mode.toMono = bak_to_mono;
1435 RESTORE_STACK;
1436 return ret;
1437 }
1438 curr_bandwidth = st->bandwidth;
1439
1440 /* Chooses the appropriate mode for speech
1441 *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
1442 if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1443 st->mode = MODE_HYBRID;
1444 if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
1445 st->mode = MODE_SILK_ONLY;
1446
1447 /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
1448 bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
1449
1450 data += 1;
1451
1452 ec_enc_init(&enc, data, max_data_bytes-1);
1453
1454 ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
1455 OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels);
1456
1457 if (st->mode == MODE_CELT_ONLY)
1458 hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
1459 else
1460 hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15;
1461
1462 st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15,
1463 hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) );
1464
1465 /* convert from log scale to Hertz */
1466 cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) );
1467
1468 if (st->application == OPUS_APPLICATION_VOIP)
1469 {
1470 hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
1471 } else {
1472 dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
1473 }
1474 #ifndef FIXED_POINT
1475 if (float_api)
1476 {
1477 opus_val32 sum;
1478 sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch);
1479 /* This should filter out both NaNs and ridiculous signals that could
1480 cause NaNs further down. */
1481 if (!(sum < 1e9f) || celt_isnan(sum))
1482 {
1483 OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels);
1484 st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
1485 }
1486 }
1487 #endif
1488
1489
1490 /* SILK processing */
1491 HB_gain = Q15ONE;
1492 if (st->mode != MODE_CELT_ONLY)
1493 {
1494 opus_int32 total_bitRate, celt_rate;
1495 #ifdef FIXED_POINT
1496 const opus_int16 *pcm_silk;
1497 #else
1498 VARDECL(opus_int16, pcm_silk);
1499 ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
1500 #endif
1501
1502 /* Distribute bits between SILK and CELT */
1503 total_bitRate = 8 * bytes_target * frame_rate;
1504 if( st->mode == MODE_HYBRID ) {
1505 int HB_gain_ref;
1506 /* Base rate for SILK */
1507 st->silk_mode.bitRate = st->stream_channels * ( 5000 + 1000 * ( st->Fs == 100 * frame_size ) );
1508 if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
1509 /* SILK gets 2/3 of the remaining bits */
1510 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 2 / 3;
1511 } else { /* FULLBAND */
1512 /* SILK gets 3/5 of the remaining bits */
1513 st->silk_mode.bitRate += ( total_bitRate - st->silk_mode.bitRate ) * 3 / 5;
1514 }
1515 /* Don't let SILK use more than 80% */
1516 if( st->silk_mode.bitRate > total_bitRate * 4/5 ) {
1517 st->silk_mode.bitRate = total_bitRate * 4/5;
1518 }
1519 if (!st->energy_masking)
1520 {
1521 /* Increasingly attenuate high band when it gets allocated fewer bits */
1522 celt_rate = total_bitRate - st->silk_mode.bitRate;
1523 HB_gain_ref = (curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND) ? 3000 : 3600;
1524 HB_gain = SHL32((opus_val32)celt_rate, 9) / SHR32((opus_val32)celt_rate + st->stream_channels * HB_gain_ref, 6);
1525 HB_gain = HB_gain < (opus_val32)Q15ONE*6/7 ? HB_gain + Q15ONE/7 : Q15ONE;
1526 }
1527 } else {
1528 /* SILK gets all bits */
1529 st->silk_mode.bitRate = total_bitRate;
1530 }
1531
1532 /* Surround masking for SILK */
1533 if (st->energy_masking && st->use_vbr && !st->lfe)
1534 {
1535 opus_val32 mask_sum=0;
1536 opus_val16 masking_depth;
1537 opus_int32 rate_offset;
1538 int c;
1539 int end = 17;
1540 opus_int16 srate = 16000;
1541 if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
1542 {
1543 end = 13;
1544 srate = 8000;
1545 } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
1546 {
1547 end = 15;
1548 srate = 12000;
1549 }
1550 for (c=0;c<st->channels;c++)
1551 {
1552 for(i=0;i<end;i++)
1553 {
1554 opus_val16 mask;
1555 mask = MAX16(MIN16(st->energy_masking[21*c+i],
1556 QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
1557 if (mask > 0)
1558 mask = HALF16(mask);
1559 mask_sum += mask;
1560 }
1561 }
1562 /* Conservative rate reduction, we cut the masking in half */
1563 masking_depth = mask_sum / end*st->channels;
1564 masking_depth += QCONST16(.2f, DB_SHIFT);
1565 rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
1566 rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
1567 /* Split the rate change between the SILK and CELT part for hybrid. */
1568 if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)
1569 st->silk_mode.bitRate += 3*rate_offset/5;
1570 else
1571 st->silk_mode.bitRate += rate_offset;
1572 bytes_target += rate_offset * frame_size / (8 * st->Fs);
1573 }
1574
1575 st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
1576 st->silk_mode.nChannelsAPI = st->channels;
1577 st->silk_mode.nChannelsInternal = st->stream_channels;
1578 if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
1579 st->silk_mode.desiredInternalSampleRate = 8000;
1580 } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
1581 st->silk_mode.desiredInternalSampleRate = 12000;
1582 } else {
1583 silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
1584 st->silk_mode.desiredInternalSampleRate = 16000;
1585 }
1586 if( st->mode == MODE_HYBRID ) {
1587 /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
1588 st->silk_mode.minInternalSampleRate = 16000;
1589 } else {
1590 st->silk_mode.minInternalSampleRate = 8000;
1591 }
1592
1593 if (st->mode == MODE_SILK_ONLY)
1594 {
1595 opus_int32 effective_max_rate = max_rate;
1596 st->silk_mode.maxInternalSampleRate = 16000;
1597 if (frame_rate > 50)
1598 effective_max_rate = effective_max_rate*2/3;
1599 if (effective_max_rate < 13000)
1600 {
1601 st->silk_mode.maxInternalSampleRate = 12000;
1602 st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate);
1603 }
1604 if (effective_max_rate < 9600)
1605 {
1606 st->silk_mode.maxInternalSampleRate = 8000;
1607 st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate);
1608 }
1609 } else {
1610 st->silk_mode.maxInternalSampleRate = 16000;
1611 }
1612
1613 st->silk_mode.useCBR = !st->use_vbr;
1614
1615 /* Call SILK encoder for the low band */
1616 nBytes = IMIN(1275, max_data_bytes-1-redundancy_bytes);
1617
1618 st->silk_mode.maxBits = nBytes*8;
1619 /* Only allow up to 90% of the bits for hybrid mode*/
1620 if (st->mode == MODE_HYBRID)
1621 st->silk_mode.maxBits = (opus_int32)st->silk_mode.maxBits*9/10;
1622 if (st->silk_mode.useCBR)
1623 {
1624 st->silk_mode.maxBits = (st->silk_mode.bitRate * frame_size / (st->Fs * 8))*8;
1625 /* Reduce the initial target to make it easier to reach the CBR rate */
1626 st->silk_mode.bitRate = IMAX(1, st->silk_mode.bitRate-2000);
1627 }
1628
1629 if (prefill)
1630 {
1631 opus_int32 zero=0;
1632 int prefill_offset;
1633 /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
1634 a discontinuity. The exact location is what we need to avoid leaving any "gap"
1635 in the audio when mixing with the redundant CELT frame. Here we can afford to
1636 overwrite st->delay_buffer because the only thing that uses it before it gets
1637 rewritten is tmp_prefill[] and even then only the part after the ramp really
1638 gets used (rather than sent to the encoder and discarded) */
1639 prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
1640 gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
1641 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
1642 OPUS_CLEAR(st->delay_buffer, prefill_offset);
1643 #ifdef FIXED_POINT
1644 pcm_silk = st->delay_buffer;
1645 #else
1646 for (i=0;i<st->encoder_buffer*st->channels;i++)
1647 pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
1648 #endif
1649 silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, 1 );
1650 }
1651
1652 #ifdef FIXED_POINT
1653 pcm_silk = pcm_buf+total_buffer*st->channels;
1654 #else
1655 for (i=0;i<frame_size*st->channels;i++)
1656 pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
1657 #endif
1658 ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
1659 if( ret ) {
1660 /*fprintf (stderr, "SILK encode error: %d\n", ret);*/
1661 /* Handle error */
1662 RESTORE_STACK;
1663 return OPUS_INTERNAL_ERROR;
1664 }
1665 if (nBytes==0)
1666 {
1667 st->rangeFinal = 0;
1668 data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
1669 RESTORE_STACK;
1670 return 1;
1671 }
1672 /* Extract SILK internal bandwidth for signaling in first byte */
1673 if( st->mode == MODE_SILK_ONLY ) {
1674 if( st->silk_mode.internalSampleRate == 8000 ) {
1675 curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1676 } else if( st->silk_mode.internalSampleRate == 12000 ) {
1677 curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1678 } else if( st->silk_mode.internalSampleRate == 16000 ) {
1679 curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1680 }
1681 } else {
1682 silk_assert( st->silk_mode.internalSampleRate == 16000 );
1683 }
1684
1685 st->silk_mode.opusCanSwitch = st->silk_mode.switchReady;
1686 /* FIXME: How do we allocate the redundancy for CBR? */
1687 if (st->silk_mode.opusCanSwitch)
1688 {
1689 redundancy = 1;
1690 celt_to_silk = 0;
1691 st->silk_bw_switch = 1;
1692 }
1693 }
1694
1695 /* CELT processing */
1696 {
1697 int endband=21;
1698
1699 switch(curr_bandwidth)
1700 {
1701 case OPUS_BANDWIDTH_NARROWBAND:
1702 endband = 13;
1703 break;
1704 case OPUS_BANDWIDTH_MEDIUMBAND:
1705 case OPUS_BANDWIDTH_WIDEBAND:
1706 endband = 17;
1707 break;
1708 case OPUS_BANDWIDTH_SUPERWIDEBAND:
1709 endband = 19;
1710 break;
1711 case OPUS_BANDWIDTH_FULLBAND:
1712 endband = 21;
1713 break;
1714 }
1715 celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
1716 celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
1717 }
1718 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
1719 if (st->mode != MODE_SILK_ONLY)
1720 {
1721 opus_val32 celt_pred=2;
1722 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
1723 /* We may still decide to disable prediction later */
1724 if (st->silk_mode.reducedDependency)
1725 celt_pred = 0;
1726 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
1727
1728 if (st->mode == MODE_HYBRID)
1729 {
1730 int len;
1731
1732 len = (ec_tell(&enc)+7)>>3;
1733 if (redundancy)
1734 len += st->mode == MODE_HYBRID ? 3 : 1;
1735 if( st->use_vbr ) {
1736 nb_compr_bytes = len + bytes_target - (st->silk_mode.bitRate * frame_size) / (8 * st->Fs);
1737 } else {
1738 /* check if SILK used up too much */
1739 nb_compr_bytes = len > bytes_target ? len : bytes_target;
1740 }
1741 } else {
1742 if (st->use_vbr)
1743 {
1744 opus_int32 bonus=0;
1745 #ifndef DISABLE_FLOAT_API
1746 if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50)
1747 {
1748 bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50);
1749 if (analysis_info.valid)
1750 bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality));
1751 }
1752 #endif
1753 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
1754 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
1755 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps+bonus));
1756 nb_compr_bytes = max_data_bytes-1-redundancy_bytes;
1757 } else {
1758 nb_compr_bytes = bytes_target;
1759 }
1760 }
1761
1762 } else {
1763 nb_compr_bytes = 0;
1764 }
1765
1766 ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
1767 if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
1768 {
1769 OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400);
1770 }
1771
1772 if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0)
1773 {
1774 OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer));
1775 OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)],
1776 &pcm_buf[0],
1777 (frame_size+total_buffer)*st->channels);
1778 } else {
1779 OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels);
1780 }
1781 /* gain_fade() and stereo_fade() need to be after the buffer copying
1782 because we don't want any of this to affect the SILK part */
1783 if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
1784 gain_fade(pcm_buf, pcm_buf,
1785 st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
1786 }
1787 st->prev_HB_gain = HB_gain;
1788 if (st->mode != MODE_HYBRID || st->stream_channels==1)
1789 st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),2*IMAX(0,equiv_rate-30000));
1790 if( !st->energy_masking && st->channels == 2 ) {
1791 /* Apply stereo width reduction (at low bitrates) */
1792 if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
1793 opus_val16 g1, g2;
1794 g1 = st->hybrid_stereo_width_Q14;
1795 g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
1796 #ifdef FIXED_POINT
1797 g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
1798 g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
1799 #else
1800 g1 *= (1.f/16384);
1801 g2 *= (1.f/16384);
1802 #endif
1803 stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
1804 frame_size, st->channels, celt_mode->window, st->Fs);
1805 st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
1806 }
1807 }
1808
1809 if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1))
1810 {
1811 /* For SILK mode, the redundancy is inferred from the length */
1812 if (st->mode == MODE_HYBRID && (redundancy || ec_tell(&enc)+37 <= 8*nb_compr_bytes))
1813 ec_enc_bit_logp(&enc, redundancy, 12);
1814 if (redundancy)
1815 {
1816 int max_redundancy;
1817 ec_enc_bit_logp(&enc, celt_to_silk, 1);
1818 if (st->mode == MODE_HYBRID)
1819 max_redundancy = (max_data_bytes-1)-nb_compr_bytes;
1820 else
1821 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3);
1822 /* Target the same bit-rate for redundancy as for the rest,
1823 up to a max of 257 bytes */
1824 redundancy_bytes = IMIN(max_redundancy, st->bitrate_bps/1600);
1825 redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes));
1826 if (st->mode == MODE_HYBRID)
1827 ec_enc_uint(&enc, redundancy_bytes-2, 256);
1828 }
1829 } else {
1830 redundancy = 0;
1831 }
1832
1833 if (!redundancy)
1834 {
1835 st->silk_bw_switch = 0;
1836 redundancy_bytes = 0;
1837 }
1838 if (st->mode != MODE_CELT_ONLY)start_band=17;
1839
1840 if (st->mode == MODE_SILK_ONLY)
1841 {
1842 ret = (ec_tell(&enc)+7)>>3;
1843 ec_enc_done(&enc);
1844 nb_compr_bytes = ret;
1845 } else {
1846 nb_compr_bytes = IMIN((max_data_bytes-1)-redundancy_bytes, nb_compr_bytes);
1847 ec_enc_shrink(&enc, nb_compr_bytes);
1848 }
1849
1850 #ifndef DISABLE_FLOAT_API
1851 if (redundancy || st->mode != MODE_SILK_ONLY)
1852 celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
1853 #endif
1854
1855 /* 5 ms redundant frame for CELT->SILK */
1856 if (redundancy && celt_to_silk)
1857 {
1858 int err;
1859 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
1860 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
1861 err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
1862 if (err < 0)
1863 {
1864 RESTORE_STACK;
1865 return OPUS_INTERNAL_ERROR;
1866 }
1867 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
1868 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
1869 }
1870
1871 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band));
1872
1873 if (st->mode != MODE_SILK_ONLY)
1874 {
1875 if (st->mode != st->prev_mode && st->prev_mode > 0)
1876 {
1877 unsigned char dummy[2];
1878 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
1879
1880 /* Prefilling */
1881 celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL);
1882 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
1883 }
1884 /* If false, we already busted the budget and we'll end up with a "PLC packet" */
1885 if (ec_tell(&enc) <= 8*nb_compr_bytes)
1886 {
1887 ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
1888 if (ret < 0)
1889 {
1890 RESTORE_STACK;
1891 return OPUS_INTERNAL_ERROR;
1892 }
1893 }
1894 }
1895
1896 /* 5 ms redundant frame for SILK->CELT */
1897 if (redundancy && !celt_to_silk)
1898 {
1899 int err;
1900 unsigned char dummy[2];
1901 int N2, N4;
1902 N2 = st->Fs/200;
1903 N4 = st->Fs/400;
1904
1905 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
1906 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
1907 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
1908
1909 /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
1910 celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
1911
1912 err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
1913 if (err < 0)
1914 {
1915 RESTORE_STACK;
1916 return OPUS_INTERNAL_ERROR;
1917 }
1918 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
1919 }
1920
1921
1922
1923 /* Signalling the mode in the first byte */
1924 data--;
1925 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
1926
1927 st->rangeFinal = enc.rng ^ redundant_rng;
1928
1929 if (to_celt)
1930 st->prev_mode = MODE_CELT_ONLY;
1931 else
1932 st->prev_mode = st->mode;
1933 st->prev_channels = st->stream_channels;
1934 st->prev_framesize = frame_size;
1935
1936 st->first = 0;
1937
1938 /* In the unlikely case that the SILK encoder busted its target, tell
1939 the decoder to call the PLC */
1940 if (ec_tell(&enc) > (max_data_bytes-1)*8)
1941 {
1942 if (max_data_bytes < 2)
1943 {
1944 RESTORE_STACK;
1945 return OPUS_BUFFER_TOO_SMALL;
1946 }
1947 data[1] = 0;
1948 ret = 1;
1949 st->rangeFinal = 0;
1950 } else if (st->mode==MODE_SILK_ONLY&&!redundancy)
1951 {
1952 /*When in LPC only mode it's perfectly
1953 reasonable to strip off trailing zero bytes as
1954 the required range decoder behavior is to
1955 fill these in. This can't be done when the MDCT
1956 modes are used because the decoder needs to know
1957 the actual length for allocation purposes.*/
1958 while(ret>2&&data[ret]==0)ret--;
1959 }
1960 /* Count ToC and redundancy */
1961 ret += 1+redundancy_bytes;
1962 if (!st->use_vbr)
1963 {
1964 if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
1965
1966 {
1967 RESTORE_STACK;
1968 return OPUS_INTERNAL_ERROR;
1969 }
1970 ret = max_data_bytes;
1971 }
1972 RESTORE_STACK;
1973 return ret;
1974 }
1975
1976 #ifdef FIXED_POINT
1977
1978 #ifndef DISABLE_FLOAT_API
opus_encode_float(OpusEncoder * st,const float * pcm,int analysis_frame_size,unsigned char * data,opus_int32 max_data_bytes)1979 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
1980 unsigned char *data, opus_int32 max_data_bytes)
1981 {
1982 int i, ret;
1983 int frame_size;
1984 int delay_compensation;
1985 VARDECL(opus_int16, in);
1986 ALLOC_STACK;
1987
1988 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
1989 delay_compensation = 0;
1990 else
1991 delay_compensation = st->delay_compensation;
1992 frame_size = compute_frame_size(pcm, analysis_frame_size,
1993 st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
1994 delay_compensation, downmix_float, st->analysis.subframe_mem);
1995
1996 ALLOC(in, frame_size*st->channels, opus_int16);
1997
1998 for (i=0;i<frame_size*st->channels;i++)
1999 in[i] = FLOAT2INT16(pcm[i]);
2000 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
2001 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
2002 RESTORE_STACK;
2003 return ret;
2004 }
2005 #endif
2006
opus_encode(OpusEncoder * st,const opus_int16 * pcm,int analysis_frame_size,unsigned char * data,opus_int32 out_data_bytes)2007 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
2008 unsigned char *data, opus_int32 out_data_bytes)
2009 {
2010 int frame_size;
2011 int delay_compensation;
2012 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2013 delay_compensation = 0;
2014 else
2015 delay_compensation = st->delay_compensation;
2016 frame_size = compute_frame_size(pcm, analysis_frame_size,
2017 st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
2018 delay_compensation, downmix_int
2019 #ifndef DISABLE_FLOAT_API
2020 , st->analysis.subframe_mem
2021 #endif
2022 );
2023 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16,
2024 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
2025 }
2026
2027 #else
opus_encode(OpusEncoder * st,const opus_int16 * pcm,int analysis_frame_size,unsigned char * data,opus_int32 max_data_bytes)2028 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
2029 unsigned char *data, opus_int32 max_data_bytes)
2030 {
2031 int i, ret;
2032 int frame_size;
2033 int delay_compensation;
2034 VARDECL(float, in);
2035 ALLOC_STACK;
2036
2037 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2038 delay_compensation = 0;
2039 else
2040 delay_compensation = st->delay_compensation;
2041 frame_size = compute_frame_size(pcm, analysis_frame_size,
2042 st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
2043 delay_compensation, downmix_int, st->analysis.subframe_mem);
2044
2045 ALLOC(in, frame_size*st->channels, float);
2046
2047 for (i=0;i<frame_size*st->channels;i++)
2048 in[i] = (1.0f/32768)*pcm[i];
2049 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
2050 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
2051 RESTORE_STACK;
2052 return ret;
2053 }
opus_encode_float(OpusEncoder * st,const float * pcm,int analysis_frame_size,unsigned char * data,opus_int32 out_data_bytes)2054 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
2055 unsigned char *data, opus_int32 out_data_bytes)
2056 {
2057 int frame_size;
2058 int delay_compensation;
2059 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2060 delay_compensation = 0;
2061 else
2062 delay_compensation = st->delay_compensation;
2063 frame_size = compute_frame_size(pcm, analysis_frame_size,
2064 st->variable_duration, st->channels, st->Fs, st->bitrate_bps,
2065 delay_compensation, downmix_float, st->analysis.subframe_mem);
2066 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
2067 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
2068 }
2069 #endif
2070
2071
opus_encoder_ctl(OpusEncoder * st,int request,...)2072 int opus_encoder_ctl(OpusEncoder *st, int request, ...)
2073 {
2074 int ret;
2075 CELTEncoder *celt_enc;
2076 va_list ap;
2077
2078 ret = OPUS_OK;
2079 va_start(ap, request);
2080
2081 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
2082
2083 switch (request)
2084 {
2085 case OPUS_SET_APPLICATION_REQUEST:
2086 {
2087 opus_int32 value = va_arg(ap, opus_int32);
2088 if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO
2089 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2090 || (!st->first && st->application != value))
2091 {
2092 ret = OPUS_BAD_ARG;
2093 break;
2094 }
2095 st->application = value;
2096 }
2097 break;
2098 case OPUS_GET_APPLICATION_REQUEST:
2099 {
2100 opus_int32 *value = va_arg(ap, opus_int32*);
2101 if (!value)
2102 {
2103 goto bad_arg;
2104 }
2105 *value = st->application;
2106 }
2107 break;
2108 case OPUS_SET_BITRATE_REQUEST:
2109 {
2110 opus_int32 value = va_arg(ap, opus_int32);
2111 if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX)
2112 {
2113 if (value <= 0)
2114 goto bad_arg;
2115 else if (value <= 500)
2116 value = 500;
2117 else if (value > (opus_int32)300000*st->channels)
2118 value = (opus_int32)300000*st->channels;
2119 }
2120 st->user_bitrate_bps = value;
2121 }
2122 break;
2123 case OPUS_GET_BITRATE_REQUEST:
2124 {
2125 opus_int32 *value = va_arg(ap, opus_int32*);
2126 if (!value)
2127 {
2128 goto bad_arg;
2129 }
2130 *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276);
2131 }
2132 break;
2133 case OPUS_SET_FORCE_CHANNELS_REQUEST:
2134 {
2135 opus_int32 value = va_arg(ap, opus_int32);
2136 if((value<1 || value>st->channels) && value != OPUS_AUTO)
2137 {
2138 goto bad_arg;
2139 }
2140 st->force_channels = value;
2141 }
2142 break;
2143 case OPUS_GET_FORCE_CHANNELS_REQUEST:
2144 {
2145 opus_int32 *value = va_arg(ap, opus_int32*);
2146 if (!value)
2147 {
2148 goto bad_arg;
2149 }
2150 *value = st->force_channels;
2151 }
2152 break;
2153 case OPUS_SET_MAX_BANDWIDTH_REQUEST:
2154 {
2155 opus_int32 value = va_arg(ap, opus_int32);
2156 if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND)
2157 {
2158 goto bad_arg;
2159 }
2160 st->max_bandwidth = value;
2161 if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
2162 st->silk_mode.maxInternalSampleRate = 8000;
2163 } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
2164 st->silk_mode.maxInternalSampleRate = 12000;
2165 } else {
2166 st->silk_mode.maxInternalSampleRate = 16000;
2167 }
2168 }
2169 break;
2170 case OPUS_GET_MAX_BANDWIDTH_REQUEST:
2171 {
2172 opus_int32 *value = va_arg(ap, opus_int32*);
2173 if (!value)
2174 {
2175 goto bad_arg;
2176 }
2177 *value = st->max_bandwidth;
2178 }
2179 break;
2180 case OPUS_SET_BANDWIDTH_REQUEST:
2181 {
2182 opus_int32 value = va_arg(ap, opus_int32);
2183 if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO)
2184 {
2185 goto bad_arg;
2186 }
2187 st->user_bandwidth = value;
2188 if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
2189 st->silk_mode.maxInternalSampleRate = 8000;
2190 } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
2191 st->silk_mode.maxInternalSampleRate = 12000;
2192 } else {
2193 st->silk_mode.maxInternalSampleRate = 16000;
2194 }
2195 }
2196 break;
2197 case OPUS_GET_BANDWIDTH_REQUEST:
2198 {
2199 opus_int32 *value = va_arg(ap, opus_int32*);
2200 if (!value)
2201 {
2202 goto bad_arg;
2203 }
2204 *value = st->bandwidth;
2205 }
2206 break;
2207 case OPUS_SET_DTX_REQUEST:
2208 {
2209 opus_int32 value = va_arg(ap, opus_int32);
2210 if(value<0 || value>1)
2211 {
2212 goto bad_arg;
2213 }
2214 st->silk_mode.useDTX = value;
2215 }
2216 break;
2217 case OPUS_GET_DTX_REQUEST:
2218 {
2219 opus_int32 *value = va_arg(ap, opus_int32*);
2220 if (!value)
2221 {
2222 goto bad_arg;
2223 }
2224 *value = st->silk_mode.useDTX;
2225 }
2226 break;
2227 case OPUS_SET_COMPLEXITY_REQUEST:
2228 {
2229 opus_int32 value = va_arg(ap, opus_int32);
2230 if(value<0 || value>10)
2231 {
2232 goto bad_arg;
2233 }
2234 st->silk_mode.complexity = value;
2235 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value));
2236 }
2237 break;
2238 case OPUS_GET_COMPLEXITY_REQUEST:
2239 {
2240 opus_int32 *value = va_arg(ap, opus_int32*);
2241 if (!value)
2242 {
2243 goto bad_arg;
2244 }
2245 *value = st->silk_mode.complexity;
2246 }
2247 break;
2248 case OPUS_SET_INBAND_FEC_REQUEST:
2249 {
2250 opus_int32 value = va_arg(ap, opus_int32);
2251 if(value<0 || value>1)
2252 {
2253 goto bad_arg;
2254 }
2255 st->silk_mode.useInBandFEC = value;
2256 }
2257 break;
2258 case OPUS_GET_INBAND_FEC_REQUEST:
2259 {
2260 opus_int32 *value = va_arg(ap, opus_int32*);
2261 if (!value)
2262 {
2263 goto bad_arg;
2264 }
2265 *value = st->silk_mode.useInBandFEC;
2266 }
2267 break;
2268 case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
2269 {
2270 opus_int32 value = va_arg(ap, opus_int32);
2271 if (value < 0 || value > 100)
2272 {
2273 goto bad_arg;
2274 }
2275 st->silk_mode.packetLossPercentage = value;
2276 celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value));
2277 }
2278 break;
2279 case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
2280 {
2281 opus_int32 *value = va_arg(ap, opus_int32*);
2282 if (!value)
2283 {
2284 goto bad_arg;
2285 }
2286 *value = st->silk_mode.packetLossPercentage;
2287 }
2288 break;
2289 case OPUS_SET_VBR_REQUEST:
2290 {
2291 opus_int32 value = va_arg(ap, opus_int32);
2292 if(value<0 || value>1)
2293 {
2294 goto bad_arg;
2295 }
2296 st->use_vbr = value;
2297 st->silk_mode.useCBR = 1-value;
2298 }
2299 break;
2300 case OPUS_GET_VBR_REQUEST:
2301 {
2302 opus_int32 *value = va_arg(ap, opus_int32*);
2303 if (!value)
2304 {
2305 goto bad_arg;
2306 }
2307 *value = st->use_vbr;
2308 }
2309 break;
2310 case OPUS_SET_VOICE_RATIO_REQUEST:
2311 {
2312 opus_int32 value = va_arg(ap, opus_int32);
2313 if (value<-1 || value>100)
2314 {
2315 goto bad_arg;
2316 }
2317 st->voice_ratio = value;
2318 }
2319 break;
2320 case OPUS_GET_VOICE_RATIO_REQUEST:
2321 {
2322 opus_int32 *value = va_arg(ap, opus_int32*);
2323 if (!value)
2324 {
2325 goto bad_arg;
2326 }
2327 *value = st->voice_ratio;
2328 }
2329 break;
2330 case OPUS_SET_VBR_CONSTRAINT_REQUEST:
2331 {
2332 opus_int32 value = va_arg(ap, opus_int32);
2333 if(value<0 || value>1)
2334 {
2335 goto bad_arg;
2336 }
2337 st->vbr_constraint = value;
2338 }
2339 break;
2340 case OPUS_GET_VBR_CONSTRAINT_REQUEST:
2341 {
2342 opus_int32 *value = va_arg(ap, opus_int32*);
2343 if (!value)
2344 {
2345 goto bad_arg;
2346 }
2347 *value = st->vbr_constraint;
2348 }
2349 break;
2350 case OPUS_SET_SIGNAL_REQUEST:
2351 {
2352 opus_int32 value = va_arg(ap, opus_int32);
2353 if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC)
2354 {
2355 goto bad_arg;
2356 }
2357 st->signal_type = value;
2358 }
2359 break;
2360 case OPUS_GET_SIGNAL_REQUEST:
2361 {
2362 opus_int32 *value = va_arg(ap, opus_int32*);
2363 if (!value)
2364 {
2365 goto bad_arg;
2366 }
2367 *value = st->signal_type;
2368 }
2369 break;
2370 case OPUS_GET_LOOKAHEAD_REQUEST:
2371 {
2372 opus_int32 *value = va_arg(ap, opus_int32*);
2373 if (!value)
2374 {
2375 goto bad_arg;
2376 }
2377 *value = st->Fs/400;
2378 if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2379 *value += st->delay_compensation;
2380 }
2381 break;
2382 case OPUS_GET_SAMPLE_RATE_REQUEST:
2383 {
2384 opus_int32 *value = va_arg(ap, opus_int32*);
2385 if (!value)
2386 {
2387 goto bad_arg;
2388 }
2389 *value = st->Fs;
2390 }
2391 break;
2392 case OPUS_GET_FINAL_RANGE_REQUEST:
2393 {
2394 opus_uint32 *value = va_arg(ap, opus_uint32*);
2395 if (!value)
2396 {
2397 goto bad_arg;
2398 }
2399 *value = st->rangeFinal;
2400 }
2401 break;
2402 case OPUS_SET_LSB_DEPTH_REQUEST:
2403 {
2404 opus_int32 value = va_arg(ap, opus_int32);
2405 if (value<8 || value>24)
2406 {
2407 goto bad_arg;
2408 }
2409 st->lsb_depth=value;
2410 }
2411 break;
2412 case OPUS_GET_LSB_DEPTH_REQUEST:
2413 {
2414 opus_int32 *value = va_arg(ap, opus_int32*);
2415 if (!value)
2416 {
2417 goto bad_arg;
2418 }
2419 *value = st->lsb_depth;
2420 }
2421 break;
2422 case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
2423 {
2424 opus_int32 value = va_arg(ap, opus_int32);
2425 if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS &&
2426 value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS &&
2427 value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS &&
2428 value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_VARIABLE)
2429 {
2430 goto bad_arg;
2431 }
2432 st->variable_duration = value;
2433 celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value));
2434 }
2435 break;
2436 case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
2437 {
2438 opus_int32 *value = va_arg(ap, opus_int32*);
2439 if (!value)
2440 {
2441 goto bad_arg;
2442 }
2443 *value = st->variable_duration;
2444 }
2445 break;
2446 case OPUS_SET_PREDICTION_DISABLED_REQUEST:
2447 {
2448 opus_int32 value = va_arg(ap, opus_int32);
2449 if (value > 1 || value < 0)
2450 goto bad_arg;
2451 st->silk_mode.reducedDependency = value;
2452 }
2453 break;
2454 case OPUS_GET_PREDICTION_DISABLED_REQUEST:
2455 {
2456 opus_int32 *value = va_arg(ap, opus_int32*);
2457 if (!value)
2458 goto bad_arg;
2459 *value = st->silk_mode.reducedDependency;
2460 }
2461 break;
2462 case OPUS_RESET_STATE:
2463 {
2464 void *silk_enc;
2465 silk_EncControlStruct dummy;
2466 char *start;
2467 silk_enc = (char*)st+st->silk_enc_offset;
2468 #ifndef DISABLE_FLOAT_API
2469 tonality_analysis_reset(&st->analysis);
2470 #endif
2471
2472 start = (char*)&st->OPUS_ENCODER_RESET_START;
2473 OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st));
2474
2475 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
2476 silk_InitEncoder( silk_enc, st->arch, &dummy );
2477 st->stream_channels = st->channels;
2478 st->hybrid_stereo_width_Q14 = 1 << 14;
2479 st->prev_HB_gain = Q15ONE;
2480 st->first = 1;
2481 st->mode = MODE_HYBRID;
2482 st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
2483 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
2484 }
2485 break;
2486 case OPUS_SET_FORCE_MODE_REQUEST:
2487 {
2488 opus_int32 value = va_arg(ap, opus_int32);
2489 if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)
2490 {
2491 goto bad_arg;
2492 }
2493 st->user_forced_mode = value;
2494 }
2495 break;
2496 case OPUS_SET_LFE_REQUEST:
2497 {
2498 opus_int32 value = va_arg(ap, opus_int32);
2499 st->lfe = value;
2500 ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
2501 }
2502 break;
2503 case OPUS_SET_ENERGY_MASK_REQUEST:
2504 {
2505 opus_val16 *value = va_arg(ap, opus_val16*);
2506 st->energy_masking = value;
2507 ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
2508 }
2509 break;
2510
2511 case CELT_GET_MODE_REQUEST:
2512 {
2513 const CELTMode ** value = va_arg(ap, const CELTMode**);
2514 if (!value)
2515 {
2516 goto bad_arg;
2517 }
2518 ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
2519 }
2520 break;
2521 default:
2522 /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
2523 ret = OPUS_UNIMPLEMENTED;
2524 break;
2525 }
2526 va_end(ap);
2527 return ret;
2528 bad_arg:
2529 va_end(ap);
2530 return OPUS_BAD_ARG;
2531 }
2532
opus_encoder_destroy(OpusEncoder * st)2533 void opus_encoder_destroy(OpusEncoder *st)
2534 {
2535 opus_free(st);
2536 }
2537