1 /*
2 * AAC decoder
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5 * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6 *
7 * AAC LATM decoder
8 * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9 * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
10 *
11 * This file is part of FFmpeg.
12 *
13 * FFmpeg is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * FFmpeg is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with FFmpeg; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 */
27
28 /**
29 * @file
30 * AAC decoder
31 * @author Oded Shimon ( ods15 ods15 dyndns org )
32 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 */
34
35 #define FFT_FLOAT 1
36 #define FFT_FIXED_32 0
37 #define USE_FIXED 0
38
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/opt.h"
41 #include "avcodec.h"
42 #include "internal.h"
43 #include "get_bits.h"
44 #include "fft.h"
45 #include "mdct15.h"
46 #include "lpc.h"
47 #include "kbdwin.h"
48 #include "sinewin.h"
49
50 #include "aac.h"
51 #include "aactab.h"
52 #include "aacdectab.h"
53 #include "adts_header.h"
54 #include "cbrt_data.h"
55 #include "sbr.h"
56 #include "aacsbr.h"
57 #include "mpeg4audio.h"
58 #include "profiles.h"
59 #include "libavutil/intfloat.h"
60
61 #include <errno.h>
62 #include <math.h>
63 #include <stdint.h>
64 #include <string.h>
65
66 #if ARCH_ARM
67 # include "arm/aac.h"
68 #elif ARCH_MIPS
69 # include "mips/aacdec_mips.h"
70 #endif
71
72 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
73 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
74 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
75 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
76
reset_predict_state(PredictorState * ps)77 static av_always_inline void reset_predict_state(PredictorState *ps)
78 {
79 ps->r0 = 0.0f;
80 ps->r1 = 0.0f;
81 ps->cor0 = 0.0f;
82 ps->cor1 = 0.0f;
83 ps->var0 = 1.0f;
84 ps->var1 = 1.0f;
85 }
86
87 #ifndef VMUL2
VMUL2(float * dst,const float * v,unsigned idx,const float * scale)88 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
89 const float *scale)
90 {
91 float s = *scale;
92 *dst++ = v[idx & 15] * s;
93 *dst++ = v[idx>>4 & 15] * s;
94 return dst;
95 }
96 #endif
97
98 #ifndef VMUL4
VMUL4(float * dst,const float * v,unsigned idx,const float * scale)99 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
100 const float *scale)
101 {
102 float s = *scale;
103 *dst++ = v[idx & 3] * s;
104 *dst++ = v[idx>>2 & 3] * s;
105 *dst++ = v[idx>>4 & 3] * s;
106 *dst++ = v[idx>>6 & 3] * s;
107 return dst;
108 }
109 #endif
110
111 #ifndef VMUL2S
VMUL2S(float * dst,const float * v,unsigned idx,unsigned sign,const float * scale)112 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
113 unsigned sign, const float *scale)
114 {
115 union av_intfloat32 s0, s1;
116
117 s0.f = s1.f = *scale;
118 s0.i ^= sign >> 1 << 31;
119 s1.i ^= sign << 31;
120
121 *dst++ = v[idx & 15] * s0.f;
122 *dst++ = v[idx>>4 & 15] * s1.f;
123
124 return dst;
125 }
126 #endif
127
128 #ifndef VMUL4S
VMUL4S(float * dst,const float * v,unsigned idx,unsigned sign,const float * scale)129 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
130 unsigned sign, const float *scale)
131 {
132 unsigned nz = idx >> 12;
133 union av_intfloat32 s = { .f = *scale };
134 union av_intfloat32 t;
135
136 t.i = s.i ^ (sign & 1U<<31);
137 *dst++ = v[idx & 3] * t.f;
138
139 sign <<= nz & 1; nz >>= 1;
140 t.i = s.i ^ (sign & 1U<<31);
141 *dst++ = v[idx>>2 & 3] * t.f;
142
143 sign <<= nz & 1; nz >>= 1;
144 t.i = s.i ^ (sign & 1U<<31);
145 *dst++ = v[idx>>4 & 3] * t.f;
146
147 sign <<= nz & 1;
148 t.i = s.i ^ (sign & 1U<<31);
149 *dst++ = v[idx>>6 & 3] * t.f;
150
151 return dst;
152 }
153 #endif
154
flt16_round(float pf)155 static av_always_inline float flt16_round(float pf)
156 {
157 union av_intfloat32 tmp;
158 tmp.f = pf;
159 tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
160 return tmp.f;
161 }
162
flt16_even(float pf)163 static av_always_inline float flt16_even(float pf)
164 {
165 union av_intfloat32 tmp;
166 tmp.f = pf;
167 tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
168 return tmp.f;
169 }
170
flt16_trunc(float pf)171 static av_always_inline float flt16_trunc(float pf)
172 {
173 union av_intfloat32 pun;
174 pun.f = pf;
175 pun.i &= 0xFFFF0000U;
176 return pun.f;
177 }
178
predict(PredictorState * ps,float * coef,int output_enable)179 static av_always_inline void predict(PredictorState *ps, float *coef,
180 int output_enable)
181 {
182 const float a = 0.953125; // 61.0 / 64
183 const float alpha = 0.90625; // 29.0 / 32
184 float e0, e1;
185 float pv;
186 float k1, k2;
187 float r0 = ps->r0, r1 = ps->r1;
188 float cor0 = ps->cor0, cor1 = ps->cor1;
189 float var0 = ps->var0, var1 = ps->var1;
190
191 k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
192 k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
193
194 pv = flt16_round(k1 * r0 + k2 * r1);
195 if (output_enable)
196 *coef += pv;
197
198 e0 = *coef;
199 e1 = e0 - k1 * r0;
200
201 ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
202 ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
203 ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
204 ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
205
206 ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
207 ps->r0 = flt16_trunc(a * e0);
208 }
209
210 /**
211 * Apply dependent channel coupling (applied before IMDCT).
212 *
213 * @param index index into coupling gain array
214 */
apply_dependent_coupling(AACContext * ac,SingleChannelElement * target,ChannelElement * cce,int index)215 static void apply_dependent_coupling(AACContext *ac,
216 SingleChannelElement *target,
217 ChannelElement *cce, int index)
218 {
219 IndividualChannelStream *ics = &cce->ch[0].ics;
220 const uint16_t *offsets = ics->swb_offset;
221 float *dest = target->coeffs;
222 const float *src = cce->ch[0].coeffs;
223 int g, i, group, k, idx = 0;
224 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
225 av_log(ac->avctx, AV_LOG_ERROR,
226 "Dependent coupling is not supported together with LTP\n");
227 return;
228 }
229 for (g = 0; g < ics->num_window_groups; g++) {
230 for (i = 0; i < ics->max_sfb; i++, idx++) {
231 if (cce->ch[0].band_type[idx] != ZERO_BT) {
232 const float gain = cce->coup.gain[index][idx];
233 for (group = 0; group < ics->group_len[g]; group++) {
234 for (k = offsets[i]; k < offsets[i + 1]; k++) {
235 // FIXME: SIMDify
236 dest[group * 128 + k] += gain * src[group * 128 + k];
237 }
238 }
239 }
240 }
241 dest += ics->group_len[g] * 128;
242 src += ics->group_len[g] * 128;
243 }
244 }
245
246 /**
247 * Apply independent channel coupling (applied after IMDCT).
248 *
249 * @param index index into coupling gain array
250 */
apply_independent_coupling(AACContext * ac,SingleChannelElement * target,ChannelElement * cce,int index)251 static void apply_independent_coupling(AACContext *ac,
252 SingleChannelElement *target,
253 ChannelElement *cce, int index)
254 {
255 const float gain = cce->coup.gain[index][0];
256 const float *src = cce->ch[0].ret;
257 float *dest = target->ret;
258 const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
259
260 ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
261 }
262
263 #include "aacdec_template.c"
264
265 #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
266
267 struct LATMContext {
268 AACContext aac_ctx; ///< containing AACContext
269 int initialized; ///< initialized after a valid extradata was seen
270
271 // parser data
272 int audio_mux_version_A; ///< LATM syntax version
273 int frame_length_type; ///< 0/1 variable/fixed frame length
274 int frame_length; ///< frame length for fixed frame length
275 };
276
latm_get_value(GetBitContext * b)277 static inline uint32_t latm_get_value(GetBitContext *b)
278 {
279 int length = get_bits(b, 2);
280
281 return get_bits_long(b, (length+1)*8);
282 }
283
latm_decode_audio_specific_config(struct LATMContext * latmctx,GetBitContext * gb,int asclen)284 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
285 GetBitContext *gb, int asclen)
286 {
287 AACContext *ac = &latmctx->aac_ctx;
288 AVCodecContext *avctx = ac->avctx;
289 MPEG4AudioConfig m4ac = { 0 };
290 GetBitContext gbc;
291 int config_start_bit = get_bits_count(gb);
292 int sync_extension = 0;
293 int bits_consumed, esize, i;
294
295 if (asclen > 0) {
296 sync_extension = 1;
297 asclen = FFMIN(asclen, get_bits_left(gb));
298 init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
299 skip_bits_long(&gbc, config_start_bit);
300 } else if (asclen == 0) {
301 gbc = *gb;
302 } else {
303 return AVERROR_INVALIDDATA;
304 }
305
306 if (get_bits_left(gb) <= 0)
307 return AVERROR_INVALIDDATA;
308
309 bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
310 &gbc, config_start_bit,
311 sync_extension);
312
313 if (bits_consumed < config_start_bit)
314 return AVERROR_INVALIDDATA;
315 bits_consumed -= config_start_bit;
316
317 if (asclen == 0)
318 asclen = bits_consumed;
319
320 if (!latmctx->initialized ||
321 ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
322 ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
323
324 if (latmctx->initialized) {
325 av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
326 } else {
327 av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
328 }
329 latmctx->initialized = 0;
330
331 esize = (asclen + 7) / 8;
332
333 if (avctx->extradata_size < esize) {
334 av_free(avctx->extradata);
335 avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
336 if (!avctx->extradata)
337 return AVERROR(ENOMEM);
338 }
339
340 avctx->extradata_size = esize;
341 gbc = *gb;
342 for (i = 0; i < esize; i++) {
343 avctx->extradata[i] = get_bits(&gbc, 8);
344 }
345 memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
346 }
347 skip_bits_long(gb, asclen);
348
349 return 0;
350 }
351
read_stream_mux_config(struct LATMContext * latmctx,GetBitContext * gb)352 static int read_stream_mux_config(struct LATMContext *latmctx,
353 GetBitContext *gb)
354 {
355 int ret, audio_mux_version = get_bits(gb, 1);
356
357 latmctx->audio_mux_version_A = 0;
358 if (audio_mux_version)
359 latmctx->audio_mux_version_A = get_bits(gb, 1);
360
361 if (!latmctx->audio_mux_version_A) {
362
363 if (audio_mux_version)
364 latm_get_value(gb); // taraFullness
365
366 skip_bits(gb, 1); // allStreamSameTimeFraming
367 skip_bits(gb, 6); // numSubFrames
368 // numPrograms
369 if (get_bits(gb, 4)) { // numPrograms
370 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
371 return AVERROR_PATCHWELCOME;
372 }
373
374 // for each program (which there is only one in DVB)
375
376 // for each layer (which there is only one in DVB)
377 if (get_bits(gb, 3)) { // numLayer
378 avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
379 return AVERROR_PATCHWELCOME;
380 }
381
382 // for all but first stream: use_same_config = get_bits(gb, 1);
383 if (!audio_mux_version) {
384 if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
385 return ret;
386 } else {
387 int ascLen = latm_get_value(gb);
388 if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
389 return ret;
390 }
391
392 latmctx->frame_length_type = get_bits(gb, 3);
393 switch (latmctx->frame_length_type) {
394 case 0:
395 skip_bits(gb, 8); // latmBufferFullness
396 break;
397 case 1:
398 latmctx->frame_length = get_bits(gb, 9);
399 break;
400 case 3:
401 case 4:
402 case 5:
403 skip_bits(gb, 6); // CELP frame length table index
404 break;
405 case 6:
406 case 7:
407 skip_bits(gb, 1); // HVXC frame length table index
408 break;
409 }
410
411 if (get_bits(gb, 1)) { // other data
412 if (audio_mux_version) {
413 latm_get_value(gb); // other_data_bits
414 } else {
415 int esc;
416 do {
417 if (get_bits_left(gb) < 9)
418 return AVERROR_INVALIDDATA;
419 esc = get_bits(gb, 1);
420 skip_bits(gb, 8);
421 } while (esc);
422 }
423 }
424
425 if (get_bits(gb, 1)) // crc present
426 skip_bits(gb, 8); // config_crc
427 }
428
429 return 0;
430 }
431
read_payload_length_info(struct LATMContext * ctx,GetBitContext * gb)432 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
433 {
434 uint8_t tmp;
435
436 if (ctx->frame_length_type == 0) {
437 int mux_slot_length = 0;
438 do {
439 if (get_bits_left(gb) < 8)
440 return AVERROR_INVALIDDATA;
441 tmp = get_bits(gb, 8);
442 mux_slot_length += tmp;
443 } while (tmp == 255);
444 return mux_slot_length;
445 } else if (ctx->frame_length_type == 1) {
446 return ctx->frame_length;
447 } else if (ctx->frame_length_type == 3 ||
448 ctx->frame_length_type == 5 ||
449 ctx->frame_length_type == 7) {
450 skip_bits(gb, 2); // mux_slot_length_coded
451 }
452 return 0;
453 }
454
read_audio_mux_element(struct LATMContext * latmctx,GetBitContext * gb)455 static int read_audio_mux_element(struct LATMContext *latmctx,
456 GetBitContext *gb)
457 {
458 int err;
459 uint8_t use_same_mux = get_bits(gb, 1);
460 if (!use_same_mux) {
461 if ((err = read_stream_mux_config(latmctx, gb)) < 0)
462 return err;
463 } else if (!latmctx->aac_ctx.avctx->extradata) {
464 av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
465 "no decoder config found\n");
466 return 1;
467 }
468 if (latmctx->audio_mux_version_A == 0) {
469 int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
470 if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
471 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
472 return AVERROR_INVALIDDATA;
473 } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
474 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
475 "frame length mismatch %d << %d\n",
476 mux_slot_length_bytes * 8, get_bits_left(gb));
477 return AVERROR_INVALIDDATA;
478 }
479 }
480 return 0;
481 }
482
483
latm_decode_frame(AVCodecContext * avctx,void * out,int * got_frame_ptr,AVPacket * avpkt)484 static int latm_decode_frame(AVCodecContext *avctx, void *out,
485 int *got_frame_ptr, AVPacket *avpkt)
486 {
487 struct LATMContext *latmctx = avctx->priv_data;
488 int muxlength, err;
489 GetBitContext gb;
490
491 if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
492 return err;
493
494 // check for LOAS sync word
495 if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
496 return AVERROR_INVALIDDATA;
497
498 muxlength = get_bits(&gb, 13) + 3;
499 // not enough data, the parser should have sorted this out
500 if (muxlength > avpkt->size)
501 return AVERROR_INVALIDDATA;
502
503 if ((err = read_audio_mux_element(latmctx, &gb)))
504 return (err < 0) ? err : avpkt->size;
505
506 if (!latmctx->initialized) {
507 if (!avctx->extradata) {
508 *got_frame_ptr = 0;
509 return avpkt->size;
510 } else {
511 push_output_configuration(&latmctx->aac_ctx);
512 if ((err = decode_audio_specific_config(
513 &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
514 avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
515 pop_output_configuration(&latmctx->aac_ctx);
516 return err;
517 }
518 latmctx->initialized = 1;
519 }
520 }
521
522 if (show_bits(&gb, 12) == 0xfff) {
523 av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
524 "ADTS header detected, probably as result of configuration "
525 "misparsing\n");
526 return AVERROR_INVALIDDATA;
527 }
528
529 switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
530 case AOT_ER_AAC_LC:
531 case AOT_ER_AAC_LTP:
532 case AOT_ER_AAC_LD:
533 case AOT_ER_AAC_ELD:
534 err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
535 break;
536 default:
537 err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
538 }
539 if (err < 0)
540 return err;
541
542 return muxlength;
543 }
544
latm_decode_init(AVCodecContext * avctx)545 static av_cold int latm_decode_init(AVCodecContext *avctx)
546 {
547 struct LATMContext *latmctx = avctx->priv_data;
548 int ret = aac_decode_init(avctx);
549
550 if (avctx->extradata_size > 0)
551 latmctx->initialized = !ret;
552
553 return ret;
554 }
555
556 AVCodec ff_aac_decoder = {
557 .name = "aac",
558 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
559 .type = AVMEDIA_TYPE_AUDIO,
560 .id = AV_CODEC_ID_AAC,
561 .priv_data_size = sizeof(AACContext),
562 .init = aac_decode_init,
563 .close = aac_decode_close,
564 .decode = aac_decode_frame,
565 .sample_fmts = (const enum AVSampleFormat[]) {
566 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
567 },
568 .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
569 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
570 .channel_layouts = aac_channel_layout,
571 .flush = flush,
572 .priv_class = &aac_decoder_class,
573 .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
574 };
575
576 /*
577 Note: This decoder filter is intended to decode LATM streams transferred
578 in MPEG transport streams which only contain one program.
579 To do a more complex LATM demuxing a separate LATM demuxer should be used.
580 */
581 AVCodec ff_aac_latm_decoder = {
582 .name = "aac_latm",
583 .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
584 .type = AVMEDIA_TYPE_AUDIO,
585 .id = AV_CODEC_ID_AAC_LATM,
586 .priv_data_size = sizeof(struct LATMContext),
587 .init = latm_decode_init,
588 .close = aac_decode_close,
589 .decode = latm_decode_frame,
590 .sample_fmts = (const enum AVSampleFormat[]) {
591 AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
592 },
593 .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
594 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
595 .channel_layouts = aac_channel_layout,
596 .flush = flush,
597 .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
598 };
599