1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/codecs/opus/opus_interface.h"
12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
13
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <string.h>
17
18 enum {
19 /* Maximum supported frame size in WebRTC is 60 ms. */
20 kWebRtcOpusMaxEncodeFrameSizeMs = 60,
21
22 /* The format allows up to 120 ms frames. Since we don't control the other
23 * side, we must allow for packets of that size. NetEq is currently limited
24 * to 60 ms on the receive side. */
25 kWebRtcOpusMaxDecodeFrameSizeMs = 120,
26
27 /* Maximum sample count per channel is 48 kHz * maximum frame size in
28 * milliseconds. */
29 kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
30
31 /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
32 kWebRtcOpusDefaultFrameSize = 960,
33
34 // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
35 kZeroBreakCount = 157,
36
37 #if defined(OPUS_FIXED_POINT)
38 kZeroBreakValue = 10,
39 #else
40 kZeroBreakValue = 1,
41 #endif
42 };
43
WebRtcOpus_EncoderCreate(OpusEncInst ** inst,size_t channels,int32_t application)44 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
45 size_t channels,
46 int32_t application) {
47 int opus_app;
48 if (!inst)
49 return -1;
50
51 switch (application) {
52 case 0:
53 opus_app = OPUS_APPLICATION_VOIP;
54 break;
55 case 1:
56 opus_app = OPUS_APPLICATION_AUDIO;
57 break;
58 default:
59 return -1;
60 }
61
62 OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
63 assert(state);
64
65 // Allocate zero counters.
66 state->zero_counts = calloc(channels, sizeof(size_t));
67 assert(state->zero_counts);
68
69 int error;
70 state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
71 &error);
72 if (error != OPUS_OK || !state->encoder) {
73 WebRtcOpus_EncoderFree(state);
74 return -1;
75 }
76
77 state->in_dtx_mode = 0;
78 state->channels = channels;
79
80 *inst = state;
81 return 0;
82 }
83
WebRtcOpus_EncoderFree(OpusEncInst * inst)84 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
85 if (inst) {
86 opus_encoder_destroy(inst->encoder);
87 free(inst->zero_counts);
88 free(inst);
89 return 0;
90 } else {
91 return -1;
92 }
93 }
94
WebRtcOpus_Encode(OpusEncInst * inst,const int16_t * audio_in,size_t samples,size_t length_encoded_buffer,uint8_t * encoded)95 int WebRtcOpus_Encode(OpusEncInst* inst,
96 const int16_t* audio_in,
97 size_t samples,
98 size_t length_encoded_buffer,
99 uint8_t* encoded) {
100 int res;
101 size_t i;
102 size_t c;
103
104 int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];
105
106 if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
107 return -1;
108 }
109
110 const size_t channels = inst->channels;
111 int use_buffer = 0;
112
113 // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
114 // samples.
115 if (inst->in_dtx_mode) {
116 for (i = 0; i < samples; ++i) {
117 for (c = 0; c < channels; ++c) {
118 if (audio_in[i * channels + c] == 0) {
119 ++inst->zero_counts[c];
120 if (inst->zero_counts[c] == kZeroBreakCount) {
121 if (!use_buffer) {
122 memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
123 use_buffer = 1;
124 }
125 buffer[i * channels + c] = kZeroBreakValue;
126 inst->zero_counts[c] = 0;
127 }
128 } else {
129 inst->zero_counts[c] = 0;
130 }
131 }
132 }
133 }
134
135 res = opus_encode(inst->encoder,
136 use_buffer ? buffer : audio_in,
137 (int)samples,
138 encoded,
139 (opus_int32)length_encoded_buffer);
140
141 if (res == 1) {
142 // Indicates DTX since the packet has nothing but a header. In principle,
143 // there is no need to send this packet. However, we do transmit the first
144 // occurrence to let the decoder know that the encoder enters DTX mode.
145 if (inst->in_dtx_mode) {
146 return 0;
147 } else {
148 inst->in_dtx_mode = 1;
149 return 1;
150 }
151 } else if (res > 1) {
152 inst->in_dtx_mode = 0;
153 return res;
154 }
155
156 return -1;
157 }
158
WebRtcOpus_SetBitRate(OpusEncInst * inst,int32_t rate)159 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
160 if (inst) {
161 return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
162 } else {
163 return -1;
164 }
165 }
166
WebRtcOpus_SetPacketLossRate(OpusEncInst * inst,int32_t loss_rate)167 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
168 if (inst) {
169 return opus_encoder_ctl(inst->encoder,
170 OPUS_SET_PACKET_LOSS_PERC(loss_rate));
171 } else {
172 return -1;
173 }
174 }
175
WebRtcOpus_SetMaxPlaybackRate(OpusEncInst * inst,int32_t frequency_hz)176 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
177 opus_int32 set_bandwidth;
178
179 if (!inst)
180 return -1;
181
182 if (frequency_hz <= 8000) {
183 set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
184 } else if (frequency_hz <= 12000) {
185 set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
186 } else if (frequency_hz <= 16000) {
187 set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
188 } else if (frequency_hz <= 24000) {
189 set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
190 } else {
191 set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
192 }
193 return opus_encoder_ctl(inst->encoder,
194 OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
195 }
196
WebRtcOpus_EnableFec(OpusEncInst * inst)197 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
198 if (inst) {
199 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
200 } else {
201 return -1;
202 }
203 }
204
WebRtcOpus_DisableFec(OpusEncInst * inst)205 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
206 if (inst) {
207 return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
208 } else {
209 return -1;
210 }
211 }
212
WebRtcOpus_EnableDtx(OpusEncInst * inst)213 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
214 if (!inst) {
215 return -1;
216 }
217
218 // To prevent Opus from entering CELT-only mode by forcing signal type to
219 // voice to make sure that DTX behaves correctly. Currently, DTX does not
220 // last long during a pure silence, if the signal type is not forced.
221 // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
222 // without it.
223 int ret = opus_encoder_ctl(inst->encoder,
224 OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
225 if (ret != OPUS_OK)
226 return ret;
227
228 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
229 }
230
WebRtcOpus_DisableDtx(OpusEncInst * inst)231 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
232 if (inst) {
233 int ret = opus_encoder_ctl(inst->encoder,
234 OPUS_SET_SIGNAL(OPUS_AUTO));
235 if (ret != OPUS_OK)
236 return ret;
237 return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
238 } else {
239 return -1;
240 }
241 }
242
WebRtcOpus_SetComplexity(OpusEncInst * inst,int32_t complexity)243 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
244 if (inst) {
245 return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
246 } else {
247 return -1;
248 }
249 }
250
WebRtcOpus_DecoderCreate(OpusDecInst ** inst,size_t channels)251 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
252 int error;
253 OpusDecInst* state;
254
255 if (inst != NULL) {
256 /* Create Opus decoder state. */
257 state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
258 if (state == NULL) {
259 return -1;
260 }
261
262 /* Create new memory, always at 48000 Hz. */
263 state->decoder = opus_decoder_create(48000, (int)channels, &error);
264 if (error == OPUS_OK && state->decoder != NULL) {
265 /* Creation of memory all ok. */
266 state->channels = channels;
267 state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
268 state->in_dtx_mode = 0;
269 *inst = state;
270 return 0;
271 }
272
273 /* If memory allocation was unsuccessful, free the entire state. */
274 if (state->decoder) {
275 opus_decoder_destroy(state->decoder);
276 }
277 free(state);
278 }
279 return -1;
280 }
281
WebRtcOpus_DecoderFree(OpusDecInst * inst)282 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
283 if (inst) {
284 opus_decoder_destroy(inst->decoder);
285 free(inst);
286 return 0;
287 } else {
288 return -1;
289 }
290 }
291
WebRtcOpus_DecoderChannels(OpusDecInst * inst)292 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
293 return inst->channels;
294 }
295
WebRtcOpus_DecoderInit(OpusDecInst * inst)296 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
297 opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
298 inst->in_dtx_mode = 0;
299 }
300
301 /* For decoder to determine if it is to output speech or comfort noise. */
DetermineAudioType(OpusDecInst * inst,size_t encoded_bytes)302 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
303 // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
304 // to be so if the following |encoded_byte| are 0 or 1.
305 if (encoded_bytes == 0 && inst->in_dtx_mode) {
306 return 2; // Comfort noise.
307 } else if (encoded_bytes == 1) {
308 inst->in_dtx_mode = 1;
309 return 2; // Comfort noise.
310 } else {
311 inst->in_dtx_mode = 0;
312 return 0; // Speech.
313 }
314 }
315
316 /* |frame_size| is set to maximum Opus frame size in the normal case, and
317 * is set to the number of samples needed for PLC in case of losses.
318 * It is up to the caller to make sure the value is correct. */
DecodeNative(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int frame_size,int16_t * decoded,int16_t * audio_type,int decode_fec)319 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
320 size_t encoded_bytes, int frame_size,
321 int16_t* decoded, int16_t* audio_type, int decode_fec) {
322 int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
323 (opus_int16*)decoded, frame_size, decode_fec);
324
325 if (res <= 0)
326 return -1;
327
328 *audio_type = DetermineAudioType(inst, encoded_bytes);
329
330 return res;
331 }
332
WebRtcOpus_Decode(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)333 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
334 size_t encoded_bytes, int16_t* decoded,
335 int16_t* audio_type) {
336 int decoded_samples;
337
338 if (encoded_bytes == 0) {
339 *audio_type = DetermineAudioType(inst, encoded_bytes);
340 decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
341 } else {
342 decoded_samples = DecodeNative(inst,
343 encoded,
344 encoded_bytes,
345 kWebRtcOpusMaxFrameSizePerChannel,
346 decoded,
347 audio_type,
348 0);
349 }
350 if (decoded_samples < 0) {
351 return -1;
352 }
353
354 /* Update decoded sample memory, to be used by the PLC in case of losses. */
355 inst->prev_decoded_samples = decoded_samples;
356
357 return decoded_samples;
358 }
359
WebRtcOpus_DecodePlc(OpusDecInst * inst,int16_t * decoded,int number_of_lost_frames)360 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
361 int number_of_lost_frames) {
362 int16_t audio_type = 0;
363 int decoded_samples;
364 int plc_samples;
365
366 /* The number of samples we ask for is |number_of_lost_frames| times
367 * |prev_decoded_samples_|. Limit the number of samples to maximum
368 * |kWebRtcOpusMaxFrameSizePerChannel|. */
369 plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
370 plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
371 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
372 decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
373 decoded, &audio_type, 0);
374 if (decoded_samples < 0) {
375 return -1;
376 }
377
378 return decoded_samples;
379 }
380
WebRtcOpus_DecodeFec(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)381 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
382 size_t encoded_bytes, int16_t* decoded,
383 int16_t* audio_type) {
384 int decoded_samples;
385 int fec_samples;
386
387 if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
388 return 0;
389 }
390
391 fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
392
393 decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
394 fec_samples, decoded, audio_type, 1);
395 if (decoded_samples < 0) {
396 return -1;
397 }
398
399 return decoded_samples;
400 }
401
WebRtcOpus_DurationEst(OpusDecInst * inst,const uint8_t * payload,size_t payload_length_bytes)402 int WebRtcOpus_DurationEst(OpusDecInst* inst,
403 const uint8_t* payload,
404 size_t payload_length_bytes) {
405 if (payload_length_bytes == 0) {
406 // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
407 // PLC duration correspondingly.
408 return WebRtcOpus_PlcDuration(inst);
409 }
410
411 int frames, samples;
412 frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
413 if (frames < 0) {
414 /* Invalid payload data. */
415 return 0;
416 }
417 samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
418 if (samples < 120 || samples > 5760) {
419 /* Invalid payload duration. */
420 return 0;
421 }
422 return samples;
423 }
424
WebRtcOpus_PlcDuration(OpusDecInst * inst)425 int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
426 /* The number of samples we ask for is |number_of_lost_frames| times
427 * |prev_decoded_samples_|. Limit the number of samples to maximum
428 * |kWebRtcOpusMaxFrameSizePerChannel|. */
429 const int plc_samples = inst->prev_decoded_samples;
430 return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
431 plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
432 }
433
WebRtcOpus_FecDurationEst(const uint8_t * payload,size_t payload_length_bytes)434 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
435 size_t payload_length_bytes) {
436 int samples;
437 if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
438 return 0;
439 }
440
441 samples = opus_packet_get_samples_per_frame(payload, 48000);
442 if (samples < 480 || samples > 5760) {
443 /* Invalid payload duration. */
444 return 0;
445 }
446 return samples;
447 }
448
WebRtcOpus_PacketHasFec(const uint8_t * payload,size_t payload_length_bytes)449 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
450 size_t payload_length_bytes) {
451 int frames, channels, payload_length_ms;
452 int n;
453 opus_int16 frame_sizes[48];
454 const unsigned char *frame_data[48];
455
456 if (payload == NULL || payload_length_bytes == 0)
457 return 0;
458
459 /* In CELT_ONLY mode, packets should not have FEC. */
460 if (payload[0] & 0x80)
461 return 0;
462
463 payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
464 if (10 > payload_length_ms)
465 payload_length_ms = 10;
466
467 channels = opus_packet_get_nb_channels(payload);
468
469 switch (payload_length_ms) {
470 case 10:
471 case 20: {
472 frames = 1;
473 break;
474 }
475 case 40: {
476 frames = 2;
477 break;
478 }
479 case 60: {
480 frames = 3;
481 break;
482 }
483 default: {
484 return 0; // It is actually even an invalid packet.
485 }
486 }
487
488 /* The following is to parse the LBRR flags. */
489 if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
490 frame_data, frame_sizes, NULL) < 0) {
491 return 0;
492 }
493
494 if (frame_sizes[0] <= 1) {
495 return 0;
496 }
497
498 for (n = 0; n < channels; n++) {
499 if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
500 return 1;
501 }
502
503 return 0;
504 }
505