• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * AAC decoder
3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
5  * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
6  *
7  * AAC LATM decoder
8  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
9  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
10  *
11  * AAC decoder fixed-point implementation
12  * Copyright (c) 2013
13  *      MIPS Technologies, Inc., California.
14  *
15  * This file is part of FFmpeg.
16  *
17  * FFmpeg is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  *
22  * FFmpeg is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with FFmpeg; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31 
32 /**
33  * @file
34  * AAC decoder
35  * @author Oded Shimon  ( ods15 ods15 dyndns org )
36  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
37  *
38  * AAC decoder fixed-point implementation
39  * @author Stanislav Ocovaj ( stanislav.ocovaj imgtec com )
40  * @author Nedeljko Babic ( nedeljko.babic imgtec com )
41  */
42 
43 /*
44  * supported tools
45  *
46  * Support?                     Name
47  * N (code in SoC repo)         gain control
48  * Y                            block switching
49  * Y                            window shapes - standard
50  * N                            window shapes - Low Delay
51  * Y                            filterbank - standard
52  * N (code in SoC repo)         filterbank - Scalable Sample Rate
53  * Y                            Temporal Noise Shaping
54  * Y                            Long Term Prediction
55  * Y                            intensity stereo
56  * Y                            channel coupling
57  * Y                            frequency domain prediction
58  * Y                            Perceptual Noise Substitution
59  * Y                            Mid/Side stereo
60  * N                            Scalable Inverse AAC Quantization
61  * N                            Frequency Selective Switch
62  * N                            upsampling filter
63  * Y                            quantization & coding - AAC
64  * N                            quantization & coding - TwinVQ
65  * N                            quantization & coding - BSAC
66  * N                            AAC Error Resilience tools
67  * N                            Error Resilience payload syntax
68  * N                            Error Protection tool
69  * N                            CELP
70  * N                            Silence Compression
71  * N                            HVXC
72  * N                            HVXC 4kbits/s VR
73  * N                            Structured Audio tools
74  * N                            Structured Audio Sample Bank Format
75  * N                            MIDI
76  * N                            Harmonic and Individual Lines plus Noise
77  * N                            Text-To-Speech Interface
78  * Y                            Spectral Band Replication
79  * Y (not in this code)         Layer-1
80  * Y (not in this code)         Layer-2
81  * Y (not in this code)         Layer-3
82  * N                            SinuSoidal Coding (Transient, Sinusoid, Noise)
83  * Y                            Parametric Stereo
84  * N                            Direct Stream Transfer
85  * Y  (not in fixed point code) Enhanced AAC Low Delay (ER AAC ELD)
86  *
87  * Note: - HE AAC v1 comprises LC AAC with Spectral Band Replication.
88  *       - HE AAC v2 comprises LC AAC with Spectral Band Replication and
89            Parametric Stereo.
90  */
91 
92 #include "libavutil/channel_layout.h"
93 #include "libavutil/thread.h"
94 #include "internal.h"
95 
96 static VLC vlc_scalefactors;
97 static VLC vlc_spectral[11];
98 
99 static int output_configure(AACContext *ac,
100                             uint8_t layout_map[MAX_ELEM_ID*4][3], int tags,
101                             enum OCStatus oc_type, int get_new_frame);
102 
103 #define overread_err "Input buffer exhausted before END element found\n"
104 
count_channels(uint8_t (* layout)[3],int tags)105 static int count_channels(uint8_t (*layout)[3], int tags)
106 {
107     int i, sum = 0;
108     for (i = 0; i < tags; i++) {
109         int syn_ele = layout[i][0];
110         int pos     = layout[i][2];
111         sum += (1 + (syn_ele == TYPE_CPE)) *
112                (pos != AAC_CHANNEL_OFF && pos != AAC_CHANNEL_CC);
113     }
114     return sum;
115 }
116 
117 /**
118  * Check for the channel element in the current channel position configuration.
119  * If it exists, make sure the appropriate element is allocated and map the
120  * channel order to match the internal FFmpeg channel layout.
121  *
122  * @param   che_pos current channel position configuration
123  * @param   type channel element type
124  * @param   id channel element id
125  * @param   channels count of the number of channels in the configuration
126  *
127  * @return  Returns error status. 0 - OK, !0 - error
128  */
che_configure(AACContext * ac,enum ChannelPosition che_pos,int type,int id,int * channels)129 static av_cold int che_configure(AACContext *ac,
130                                  enum ChannelPosition che_pos,
131                                  int type, int id, int *channels)
132 {
133     if (*channels >= MAX_CHANNELS)
134         return AVERROR_INVALIDDATA;
135     if (che_pos) {
136         if (!ac->che[type][id]) {
137             if (!(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
138                 return AVERROR(ENOMEM);
139             AAC_RENAME(ff_aac_sbr_ctx_init)(ac, &ac->che[type][id]->sbr, type);
140         }
141         if (type != TYPE_CCE) {
142             if (*channels >= MAX_CHANNELS - (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))) {
143                 av_log(ac->avctx, AV_LOG_ERROR, "Too many channels\n");
144                 return AVERROR_INVALIDDATA;
145             }
146             ac->output_element[(*channels)++] = &ac->che[type][id]->ch[0];
147             if (type == TYPE_CPE ||
148                 (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1)) {
149                 ac->output_element[(*channels)++] = &ac->che[type][id]->ch[1];
150             }
151         }
152     } else {
153         if (ac->che[type][id])
154             AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][id]->sbr);
155         av_freep(&ac->che[type][id]);
156     }
157     return 0;
158 }
159 
frame_configure_elements(AVCodecContext * avctx)160 static int frame_configure_elements(AVCodecContext *avctx)
161 {
162     AACContext *ac = avctx->priv_data;
163     int type, id, ch, ret;
164 
165     /* set channel pointers to internal buffers by default */
166     for (type = 0; type < 4; type++) {
167         for (id = 0; id < MAX_ELEM_ID; id++) {
168             ChannelElement *che = ac->che[type][id];
169             if (che) {
170                 che->ch[0].ret = che->ch[0].ret_buf;
171                 che->ch[1].ret = che->ch[1].ret_buf;
172             }
173         }
174     }
175 
176     /* get output buffer */
177     av_frame_unref(ac->frame);
178     if (!avctx->ch_layout.nb_channels)
179         return 1;
180 
181     ac->frame->nb_samples = 2048;
182     if ((ret = ff_get_buffer(avctx, ac->frame, 0)) < 0)
183         return ret;
184 
185     /* map output channel pointers to AVFrame data */
186     for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
187         if (ac->output_element[ch])
188             ac->output_element[ch]->ret = (INTFLOAT *)ac->frame->extended_data[ch];
189     }
190 
191     return 0;
192 }
193 
194 struct elem_to_channel {
195     uint64_t av_position;
196     uint8_t syn_ele;
197     uint8_t elem_id;
198     uint8_t aac_position;
199 };
200 
assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],uint8_t (* layout_map)[3],int offset,uint64_t left,uint64_t right,int pos,uint64_t * layout)201 static int assign_pair(struct elem_to_channel e2c_vec[MAX_ELEM_ID],
202                        uint8_t (*layout_map)[3], int offset, uint64_t left,
203                        uint64_t right, int pos, uint64_t *layout)
204 {
205     if (layout_map[offset][0] == TYPE_CPE) {
206         e2c_vec[offset] = (struct elem_to_channel) {
207             .av_position  = left | right,
208             .syn_ele      = TYPE_CPE,
209             .elem_id      = layout_map[offset][1],
210             .aac_position = pos
211         };
212         if (e2c_vec[offset].av_position != UINT64_MAX)
213             *layout |= e2c_vec[offset].av_position;
214 
215         return 1;
216     } else {
217         e2c_vec[offset] = (struct elem_to_channel) {
218             .av_position  = left,
219             .syn_ele      = TYPE_SCE,
220             .elem_id      = layout_map[offset][1],
221             .aac_position = pos
222         };
223         e2c_vec[offset + 1] = (struct elem_to_channel) {
224             .av_position  = right,
225             .syn_ele      = TYPE_SCE,
226             .elem_id      = layout_map[offset + 1][1],
227             .aac_position = pos
228         };
229         if (left != UINT64_MAX)
230             *layout |= left;
231 
232         if (right != UINT64_MAX)
233             *layout |= right;
234 
235         return 2;
236     }
237 }
238 
count_paired_channels(uint8_t (* layout_map)[3],int tags,int pos,int * current)239 static int count_paired_channels(uint8_t (*layout_map)[3], int tags, int pos,
240                                  int *current)
241 {
242     int num_pos_channels = 0;
243     int first_cpe        = 0;
244     int sce_parity       = 0;
245     int i;
246     for (i = *current; i < tags; i++) {
247         if (layout_map[i][2] != pos)
248             break;
249         if (layout_map[i][0] == TYPE_CPE) {
250             if (sce_parity) {
251                 if (pos == AAC_CHANNEL_FRONT && !first_cpe) {
252                     sce_parity = 0;
253                 } else {
254                     return -1;
255                 }
256             }
257             num_pos_channels += 2;
258             first_cpe         = 1;
259         } else {
260             num_pos_channels++;
261             sce_parity ^= 1;
262         }
263     }
264     if (sce_parity &&
265         ((pos == AAC_CHANNEL_FRONT && first_cpe) || pos == AAC_CHANNEL_SIDE))
266         return -1;
267     *current = i;
268     return num_pos_channels;
269 }
270 
271 #define PREFIX_FOR_22POINT2 (AV_CH_LAYOUT_7POINT1_WIDE_BACK|AV_CH_BACK_CENTER|AV_CH_SIDE_LEFT|AV_CH_SIDE_RIGHT|AV_CH_LOW_FREQUENCY_2)
sniff_channel_order(uint8_t (* layout_map)[3],int tags)272 static uint64_t sniff_channel_order(uint8_t (*layout_map)[3], int tags)
273 {
274     int i, n, total_non_cc_elements;
275     struct elem_to_channel e2c_vec[4 * MAX_ELEM_ID] = { { 0 } };
276     int num_front_channels, num_side_channels, num_back_channels;
277     uint64_t layout = 0;
278 
279     if (FF_ARRAY_ELEMS(e2c_vec) < tags)
280         return 0;
281 
282     i = 0;
283     num_front_channels =
284         count_paired_channels(layout_map, tags, AAC_CHANNEL_FRONT, &i);
285     if (num_front_channels < 0)
286         return 0;
287     num_side_channels =
288         count_paired_channels(layout_map, tags, AAC_CHANNEL_SIDE, &i);
289     if (num_side_channels < 0)
290         return 0;
291     num_back_channels =
292         count_paired_channels(layout_map, tags, AAC_CHANNEL_BACK, &i);
293     if (num_back_channels < 0)
294         return 0;
295 
296     if (num_side_channels == 0 && num_back_channels >= 4) {
297         num_side_channels = 2;
298         num_back_channels -= 2;
299     }
300 
301     i = 0;
302     if (num_front_channels & 1) {
303         e2c_vec[i] = (struct elem_to_channel) {
304             .av_position  = AV_CH_FRONT_CENTER,
305             .syn_ele      = TYPE_SCE,
306             .elem_id      = layout_map[i][1],
307             .aac_position = AAC_CHANNEL_FRONT
308         };
309         layout |= e2c_vec[i].av_position;
310         i++;
311         num_front_channels--;
312     }
313     if (num_front_channels >= 4) {
314         i += assign_pair(e2c_vec, layout_map, i,
315                          AV_CH_FRONT_LEFT_OF_CENTER,
316                          AV_CH_FRONT_RIGHT_OF_CENTER,
317                          AAC_CHANNEL_FRONT, &layout);
318         num_front_channels -= 2;
319     }
320     if (num_front_channels >= 2) {
321         i += assign_pair(e2c_vec, layout_map, i,
322                          AV_CH_FRONT_LEFT,
323                          AV_CH_FRONT_RIGHT,
324                          AAC_CHANNEL_FRONT, &layout);
325         num_front_channels -= 2;
326     }
327     while (num_front_channels >= 2) {
328         i += assign_pair(e2c_vec, layout_map, i,
329                          UINT64_MAX,
330                          UINT64_MAX,
331                          AAC_CHANNEL_FRONT, &layout);
332         num_front_channels -= 2;
333     }
334 
335     if (num_side_channels >= 2) {
336         i += assign_pair(e2c_vec, layout_map, i,
337                          AV_CH_SIDE_LEFT,
338                          AV_CH_SIDE_RIGHT,
339                          AAC_CHANNEL_FRONT, &layout);
340         num_side_channels -= 2;
341     }
342     while (num_side_channels >= 2) {
343         i += assign_pair(e2c_vec, layout_map, i,
344                          UINT64_MAX,
345                          UINT64_MAX,
346                          AAC_CHANNEL_SIDE, &layout);
347         num_side_channels -= 2;
348     }
349 
350     while (num_back_channels >= 4) {
351         i += assign_pair(e2c_vec, layout_map, i,
352                          UINT64_MAX,
353                          UINT64_MAX,
354                          AAC_CHANNEL_BACK, &layout);
355         num_back_channels -= 2;
356     }
357     if (num_back_channels >= 2) {
358         i += assign_pair(e2c_vec, layout_map, i,
359                          AV_CH_BACK_LEFT,
360                          AV_CH_BACK_RIGHT,
361                          AAC_CHANNEL_BACK, &layout);
362         num_back_channels -= 2;
363     }
364     if (num_back_channels) {
365         e2c_vec[i] = (struct elem_to_channel) {
366             .av_position  = AV_CH_BACK_CENTER,
367             .syn_ele      = TYPE_SCE,
368             .elem_id      = layout_map[i][1],
369             .aac_position = AAC_CHANNEL_BACK
370         };
371         layout |= e2c_vec[i].av_position;
372         i++;
373         num_back_channels--;
374     }
375 
376     if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
377         e2c_vec[i] = (struct elem_to_channel) {
378             .av_position  = AV_CH_LOW_FREQUENCY,
379             .syn_ele      = TYPE_LFE,
380             .elem_id      = layout_map[i][1],
381             .aac_position = AAC_CHANNEL_LFE
382         };
383         layout |= e2c_vec[i].av_position;
384         i++;
385     }
386     if (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
387         e2c_vec[i] = (struct elem_to_channel) {
388             .av_position  = AV_CH_LOW_FREQUENCY_2,
389             .syn_ele      = TYPE_LFE,
390             .elem_id      = layout_map[i][1],
391             .aac_position = AAC_CHANNEL_LFE
392         };
393         layout |= e2c_vec[i].av_position;
394         i++;
395     }
396     while (i < tags && layout_map[i][2] == AAC_CHANNEL_LFE) {
397         e2c_vec[i] = (struct elem_to_channel) {
398             .av_position  = UINT64_MAX,
399             .syn_ele      = TYPE_LFE,
400             .elem_id      = layout_map[i][1],
401             .aac_position = AAC_CHANNEL_LFE
402         };
403         i++;
404     }
405 
406     // The previous checks would end up at 8 at this point for 22.2
407     if (layout == PREFIX_FOR_22POINT2 && tags == 16 && i == 8) {
408         const uint8_t (*reference_layout_map)[3] = aac_channel_layout_map[12];
409         for (int j = 0; j < tags; j++) {
410             if (layout_map[j][0] != reference_layout_map[j][0] ||
411                 layout_map[j][2] != reference_layout_map[j][2])
412                 goto end_of_layout_definition;
413         }
414 
415         e2c_vec[i] = (struct elem_to_channel) {
416             .av_position  = AV_CH_TOP_FRONT_CENTER,
417             .syn_ele      = layout_map[i][0],
418             .elem_id      = layout_map[i][1],
419             .aac_position = layout_map[i][2]
420         }; layout |= e2c_vec[i].av_position; i++;
421         i += assign_pair(e2c_vec, layout_map, i,
422                          AV_CH_TOP_FRONT_LEFT,
423                          AV_CH_TOP_FRONT_RIGHT,
424                          AAC_CHANNEL_FRONT,
425                          &layout);
426         i += assign_pair(e2c_vec, layout_map, i,
427                          AV_CH_TOP_SIDE_LEFT,
428                          AV_CH_TOP_SIDE_RIGHT,
429                          AAC_CHANNEL_SIDE,
430                          &layout);
431         e2c_vec[i] = (struct elem_to_channel) {
432             .av_position  = AV_CH_TOP_CENTER,
433             .syn_ele      = layout_map[i][0],
434             .elem_id      = layout_map[i][1],
435             .aac_position = layout_map[i][2]
436         }; layout |= e2c_vec[i].av_position; i++;
437         i += assign_pair(e2c_vec, layout_map, i,
438                          AV_CH_TOP_BACK_LEFT,
439                          AV_CH_TOP_BACK_RIGHT,
440                          AAC_CHANNEL_BACK,
441                          &layout);
442         e2c_vec[i] = (struct elem_to_channel) {
443             .av_position  = AV_CH_TOP_BACK_CENTER,
444             .syn_ele      = layout_map[i][0],
445             .elem_id      = layout_map[i][1],
446             .aac_position = layout_map[i][2]
447         }; layout |= e2c_vec[i].av_position; i++;
448         e2c_vec[i] = (struct elem_to_channel) {
449             .av_position  = AV_CH_BOTTOM_FRONT_CENTER,
450             .syn_ele      = layout_map[i][0],
451             .elem_id      = layout_map[i][1],
452             .aac_position = layout_map[i][2]
453         }; layout |= e2c_vec[i].av_position; i++;
454         i += assign_pair(e2c_vec, layout_map, i,
455                          AV_CH_BOTTOM_FRONT_LEFT,
456                          AV_CH_BOTTOM_FRONT_RIGHT,
457                          AAC_CHANNEL_FRONT,
458                          &layout);
459     }
460 
461 end_of_layout_definition:
462 
463     total_non_cc_elements = n = i;
464 
465     if (layout == AV_CH_LAYOUT_22POINT2) {
466         // For 22.2 reorder the result as needed
467         FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[0]);   // FL & FR first (final), FC third
468         FFSWAP(struct elem_to_channel, e2c_vec[2], e2c_vec[1]);   // FC second (final), FLc & FRc third
469         FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[2]);   // LFE1 third (final), FLc & FRc seventh
470         FFSWAP(struct elem_to_channel, e2c_vec[4], e2c_vec[3]);   // BL & BR fourth (final), SiL & SiR fifth
471         FFSWAP(struct elem_to_channel, e2c_vec[6], e2c_vec[4]);   // FLc & FRc fifth (final), SiL & SiR seventh
472         FFSWAP(struct elem_to_channel, e2c_vec[7], e2c_vec[6]);   // LFE2 seventh (final), SiL & SiR eight (final)
473         FFSWAP(struct elem_to_channel, e2c_vec[9], e2c_vec[8]);   // TpFL & TpFR ninth (final), TFC tenth (final)
474         FFSWAP(struct elem_to_channel, e2c_vec[11], e2c_vec[10]); // TC eleventh (final), TpSiL & TpSiR twelth
475         FFSWAP(struct elem_to_channel, e2c_vec[12], e2c_vec[11]); // TpBL & TpBR twelth (final), TpSiL & TpSiR thirteenth (final)
476     } else {
477         // For everything else, utilize the AV channel position define as a
478         // stable sort.
479         do {
480             int next_n = 0;
481             for (i = 1; i < n; i++)
482                 if (e2c_vec[i - 1].av_position > e2c_vec[i].av_position) {
483                     FFSWAP(struct elem_to_channel, e2c_vec[i - 1], e2c_vec[i]);
484                     next_n = i;
485                 }
486             n = next_n;
487         } while (n > 0);
488 
489     }
490 
491     for (i = 0; i < total_non_cc_elements; i++) {
492         layout_map[i][0] = e2c_vec[i].syn_ele;
493         layout_map[i][1] = e2c_vec[i].elem_id;
494         layout_map[i][2] = e2c_vec[i].aac_position;
495     }
496 
497     return layout;
498 }
499 
500 /**
501  * Save current output configuration if and only if it has been locked.
502  */
push_output_configuration(AACContext * ac)503 static int push_output_configuration(AACContext *ac) {
504     int pushed = 0;
505 
506     if (ac->oc[1].status == OC_LOCKED || ac->oc[0].status == OC_NONE) {
507         ac->oc[0] = ac->oc[1];
508         pushed = 1;
509     }
510     ac->oc[1].status = OC_NONE;
511     return pushed;
512 }
513 
514 /**
515  * Restore the previous output configuration if and only if the current
516  * configuration is unlocked.
517  */
pop_output_configuration(AACContext * ac)518 static void pop_output_configuration(AACContext *ac) {
519     if (ac->oc[1].status != OC_LOCKED && ac->oc[0].status != OC_NONE) {
520         ac->oc[1] = ac->oc[0];
521         ac->avctx->ch_layout = ac->oc[1].ch_layout;
522         output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
523                          ac->oc[1].status, 0);
524     }
525 }
526 
527 /**
528  * Configure output channel order based on the current program
529  * configuration element.
530  *
531  * @return  Returns error status. 0 - OK, !0 - error
532  */
output_configure(AACContext * ac,uint8_t layout_map[MAX_ELEM_ID * 4][3],int tags,enum OCStatus oc_type,int get_new_frame)533 static int output_configure(AACContext *ac,
534                             uint8_t layout_map[MAX_ELEM_ID * 4][3], int tags,
535                             enum OCStatus oc_type, int get_new_frame)
536 {
537     AVCodecContext *avctx = ac->avctx;
538     int i, channels = 0, ret;
539     uint64_t layout = 0;
540     uint8_t id_map[TYPE_END][MAX_ELEM_ID] = {{ 0 }};
541     uint8_t type_counts[TYPE_END] = { 0 };
542 
543     if (ac->oc[1].layout_map != layout_map) {
544         memcpy(ac->oc[1].layout_map, layout_map, tags * sizeof(layout_map[0]));
545         ac->oc[1].layout_map_tags = tags;
546     }
547     for (i = 0; i < tags; i++) {
548         int type =         layout_map[i][0];
549         int id =           layout_map[i][1];
550         id_map[type][id] = type_counts[type]++;
551         if (id_map[type][id] >= MAX_ELEM_ID) {
552             avpriv_request_sample(ac->avctx, "Too large remapped id");
553             return AVERROR_PATCHWELCOME;
554         }
555     }
556     // Try to sniff a reasonable channel order, otherwise output the
557     // channels in the order the PCE declared them.
558 #if FF_API_OLD_CHANNEL_LAYOUT
559 FF_DISABLE_DEPRECATION_WARNINGS
560     if (avctx->request_channel_layout == AV_CH_LAYOUT_NATIVE)
561         ac->output_channel_order = CHANNEL_ORDER_CODED;
562 FF_ENABLE_DEPRECATION_WARNINGS
563 #endif
564 
565     if (ac->output_channel_order == CHANNEL_ORDER_DEFAULT)
566         layout = sniff_channel_order(layout_map, tags);
567     for (i = 0; i < tags; i++) {
568         int type =     layout_map[i][0];
569         int id =       layout_map[i][1];
570         int iid =      id_map[type][id];
571         int position = layout_map[i][2];
572         // Allocate or free elements depending on if they are in the
573         // current program configuration.
574         ret = che_configure(ac, position, type, iid, &channels);
575         if (ret < 0)
576             return ret;
577         ac->tag_che_map[type][id] = ac->che[type][iid];
578     }
579     if (ac->oc[1].m4ac.ps == 1 && channels == 2) {
580         if (layout == AV_CH_FRONT_CENTER) {
581             layout = AV_CH_FRONT_LEFT|AV_CH_FRONT_RIGHT;
582         } else {
583             layout = 0;
584         }
585     }
586 
587     av_channel_layout_uninit(&ac->oc[1].ch_layout);
588     if (layout)
589         av_channel_layout_from_mask(&ac->oc[1].ch_layout, layout);
590     else {
591         ac->oc[1].ch_layout.order       = AV_CHANNEL_ORDER_UNSPEC;
592         ac->oc[1].ch_layout.nb_channels = channels;
593     }
594 
595     av_channel_layout_copy(&avctx->ch_layout, &ac->oc[1].ch_layout);
596     ac->oc[1].status = oc_type;
597 
598     if (get_new_frame) {
599         if ((ret = frame_configure_elements(ac->avctx)) < 0)
600             return ret;
601     }
602 
603     return 0;
604 }
605 
flush(AVCodecContext * avctx)606 static void flush(AVCodecContext *avctx)
607 {
608     AACContext *ac= avctx->priv_data;
609     int type, i, j;
610 
611     for (type = 3; type >= 0; type--) {
612         for (i = 0; i < MAX_ELEM_ID; i++) {
613             ChannelElement *che = ac->che[type][i];
614             if (che) {
615                 for (j = 0; j <= 1; j++) {
616                     memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved));
617 #ifdef OHOS_OPT_COMPAT
618                     memset(&che->sbr.data[j], 0, sizeof(che->sbr.data[j]));
619 #endif
620                 }
621             }
622         }
623     }
624 }
625 
626 /**
627  * Set up channel positions based on a default channel configuration
628  * as specified in table 1.17.
629  *
630  * @return  Returns error status. 0 - OK, !0 - error
631  */
set_default_channel_config(AACContext * ac,AVCodecContext * avctx,uint8_t (* layout_map)[3],int * tags,int channel_config)632 static int set_default_channel_config(AACContext *ac, AVCodecContext *avctx,
633                                       uint8_t (*layout_map)[3],
634                                       int *tags,
635                                       int channel_config)
636 {
637     if (channel_config < 1 || (channel_config > 7 && channel_config < 11) ||
638         channel_config > 13) {
639         av_log(avctx, AV_LOG_ERROR,
640                "invalid default channel configuration (%d)\n",
641                channel_config);
642         return AVERROR_INVALIDDATA;
643     }
644     *tags = tags_per_config[channel_config];
645     memcpy(layout_map, aac_channel_layout_map[channel_config - 1],
646            *tags * sizeof(*layout_map));
647 
648     /*
649      * AAC specification has 7.1(wide) as a default layout for 8-channel streams.
650      * However, at least Nero AAC encoder encodes 7.1 streams using the default
651      * channel config 7, mapping the side channels of the original audio stream
652      * to the second AAC_CHANNEL_FRONT pair in the AAC stream. Similarly, e.g. FAAD
653      * decodes the second AAC_CHANNEL_FRONT pair as side channels, therefore decoding
654      * the incorrect streams as if they were correct (and as the encoder intended).
655      *
656      * As actual intended 7.1(wide) streams are very rare, default to assuming a
657      * 7.1 layout was intended.
658      */
659     if (channel_config == 7 && avctx->strict_std_compliance < FF_COMPLIANCE_STRICT) {
660         layout_map[2][2] = AAC_CHANNEL_SIDE;
661 
662         if (!ac || !ac->warned_71_wide++) {
663             av_log(avctx, AV_LOG_INFO, "Assuming an incorrectly encoded 7.1 channel layout"
664                    " instead of a spec-compliant 7.1(wide) layout, use -strict %d to decode"
665                    " according to the specification instead.\n", FF_COMPLIANCE_STRICT);
666         }
667     }
668 
669     return 0;
670 }
671 
get_che(AACContext * ac,int type,int elem_id)672 static ChannelElement *get_che(AACContext *ac, int type, int elem_id)
673 {
674     /* For PCE based channel configurations map the channels solely based
675      * on tags. */
676     if (!ac->oc[1].m4ac.chan_config) {
677         return ac->tag_che_map[type][elem_id];
678     }
679     // Allow single CPE stereo files to be signalled with mono configuration.
680     if (!ac->tags_mapped && type == TYPE_CPE &&
681         ac->oc[1].m4ac.chan_config == 1) {
682         uint8_t layout_map[MAX_ELEM_ID*4][3];
683         int layout_map_tags;
684         push_output_configuration(ac);
685 
686         av_log(ac->avctx, AV_LOG_DEBUG, "mono with CPE\n");
687 
688         if (set_default_channel_config(ac, ac->avctx, layout_map,
689                                        &layout_map_tags, 2) < 0)
690             return NULL;
691         if (output_configure(ac, layout_map, layout_map_tags,
692                              OC_TRIAL_FRAME, 1) < 0)
693             return NULL;
694 
695         ac->oc[1].m4ac.chan_config = 2;
696         ac->oc[1].m4ac.ps = 0;
697     }
698     // And vice-versa
699     if (!ac->tags_mapped && type == TYPE_SCE &&
700         ac->oc[1].m4ac.chan_config == 2) {
701         uint8_t layout_map[MAX_ELEM_ID * 4][3];
702         int layout_map_tags;
703         push_output_configuration(ac);
704 
705         av_log(ac->avctx, AV_LOG_DEBUG, "stereo with SCE\n");
706 
707         layout_map_tags = 2;
708         layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
709         layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
710         layout_map[0][1] = 0;
711         layout_map[1][1] = 1;
712         if (output_configure(ac, layout_map, layout_map_tags,
713                              OC_TRIAL_FRAME, 1) < 0)
714             return NULL;
715 
716         if (ac->oc[1].m4ac.sbr)
717             ac->oc[1].m4ac.ps = -1;
718     }
719     /* For indexed channel configurations map the channels solely based
720      * on position. */
721     switch (ac->oc[1].m4ac.chan_config) {
722     case 13:
723         if (ac->tags_mapped > 3 && ((type == TYPE_CPE && elem_id < 8) ||
724                                     (type == TYPE_SCE && elem_id < 6) ||
725                                     (type == TYPE_LFE && elem_id < 2))) {
726             ac->tags_mapped++;
727             return ac->tag_che_map[type][elem_id] = ac->che[type][elem_id];
728         }
729     case 12:
730     case 7:
731         if (ac->tags_mapped == 3 && type == TYPE_CPE) {
732             ac->tags_mapped++;
733             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][2];
734         }
735     case 11:
736         if (ac->tags_mapped == 3 && type == TYPE_SCE) {
737             ac->tags_mapped++;
738             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
739         }
740     case 6:
741         /* Some streams incorrectly code 5.1 audio as
742          * SCE[0] CPE[0] CPE[1] SCE[1]
743          * instead of
744          * SCE[0] CPE[0] CPE[1] LFE[0].
745          * If we seem to have encountered such a stream, transfer
746          * the LFE[0] element to the SCE[1]'s mapping */
747         if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
748             if (!ac->warned_remapping_once && (type != TYPE_LFE || elem_id != 0)) {
749                 av_log(ac->avctx, AV_LOG_WARNING,
750                    "This stream seems to incorrectly report its last channel as %s[%d], mapping to LFE[0]\n",
751                    type == TYPE_SCE ? "SCE" : "LFE", elem_id);
752                 ac->warned_remapping_once++;
753             }
754             ac->tags_mapped++;
755             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_LFE][0];
756         }
757     case 5:
758         if (ac->tags_mapped == 2 && type == TYPE_CPE) {
759             ac->tags_mapped++;
760             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][1];
761         }
762     case 4:
763         /* Some streams incorrectly code 4.0 audio as
764          * SCE[0] CPE[0] LFE[0]
765          * instead of
766          * SCE[0] CPE[0] SCE[1].
767          * If we seem to have encountered such a stream, transfer
768          * the SCE[1] element to the LFE[0]'s mapping */
769         if (ac->tags_mapped == tags_per_config[ac->oc[1].m4ac.chan_config] - 1 && (type == TYPE_LFE || type == TYPE_SCE)) {
770             if (!ac->warned_remapping_once && (type != TYPE_SCE || elem_id != 1)) {
771                 av_log(ac->avctx, AV_LOG_WARNING,
772                    "This stream seems to incorrectly report its last channel as %s[%d], mapping to SCE[1]\n",
773                    type == TYPE_SCE ? "SCE" : "LFE", elem_id);
774                 ac->warned_remapping_once++;
775             }
776             ac->tags_mapped++;
777             return ac->tag_che_map[type][elem_id] = ac->che[TYPE_SCE][1];
778         }
779         if (ac->tags_mapped == 2 &&
780             ac->oc[1].m4ac.chan_config == 4 &&
781             type == TYPE_SCE) {
782             ac->tags_mapped++;
783             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
784         }
785     case 3:
786     case 2:
787         if (ac->tags_mapped == (ac->oc[1].m4ac.chan_config != 2) &&
788             type == TYPE_CPE) {
789             ac->tags_mapped++;
790             return ac->tag_che_map[TYPE_CPE][elem_id] = ac->che[TYPE_CPE][0];
791         } else if (ac->tags_mapped == 1 && ac->oc[1].m4ac.chan_config == 2 &&
792             type == TYPE_SCE) {
793             ac->tags_mapped++;
794             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][1];
795         }
796     case 1:
797         if (!ac->tags_mapped && type == TYPE_SCE) {
798             ac->tags_mapped++;
799             return ac->tag_che_map[TYPE_SCE][elem_id] = ac->che[TYPE_SCE][0];
800         }
801     default:
802         return NULL;
803     }
804 }
805 
806 /**
807  * Decode an array of 4 bit element IDs, optionally interleaved with a
808  * stereo/mono switching bit.
809  *
810  * @param type speaker type/position for these channels
811  */
decode_channel_map(uint8_t layout_map[][3],enum ChannelPosition type,GetBitContext * gb,int n)812 static void decode_channel_map(uint8_t layout_map[][3],
813                                enum ChannelPosition type,
814                                GetBitContext *gb, int n)
815 {
816     while (n--) {
817         enum RawDataBlockType syn_ele;
818         switch (type) {
819         case AAC_CHANNEL_FRONT:
820         case AAC_CHANNEL_BACK:
821         case AAC_CHANNEL_SIDE:
822             syn_ele = get_bits1(gb);
823             break;
824         case AAC_CHANNEL_CC:
825             skip_bits1(gb);
826             syn_ele = TYPE_CCE;
827             break;
828         case AAC_CHANNEL_LFE:
829             syn_ele = TYPE_LFE;
830             break;
831         default:
832             // AAC_CHANNEL_OFF has no channel map
833             av_assert0(0);
834         }
835         layout_map[0][0] = syn_ele;
836         layout_map[0][1] = get_bits(gb, 4);
837         layout_map[0][2] = type;
838         layout_map++;
839     }
840 }
841 
relative_align_get_bits(GetBitContext * gb,int reference_position)842 static inline void relative_align_get_bits(GetBitContext *gb,
843                                            int reference_position) {
844     int n = (reference_position - get_bits_count(gb) & 7);
845     if (n)
846         skip_bits(gb, n);
847 }
848 
849 /**
850  * Decode program configuration element; reference: table 4.2.
851  *
852  * @return  Returns error status. 0 - OK, !0 - error
853  */
decode_pce(AVCodecContext * avctx,MPEG4AudioConfig * m4ac,uint8_t (* layout_map)[3],GetBitContext * gb,int byte_align_ref)854 static int decode_pce(AVCodecContext *avctx, MPEG4AudioConfig *m4ac,
855                       uint8_t (*layout_map)[3],
856                       GetBitContext *gb, int byte_align_ref)
857 {
858     int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
859     int sampling_index;
860     int comment_len;
861     int tags;
862 
863     skip_bits(gb, 2);  // object_type
864 
865     sampling_index = get_bits(gb, 4);
866     if (m4ac->sampling_index != sampling_index)
867         av_log(avctx, AV_LOG_WARNING,
868                "Sample rate index in program config element does not "
869                "match the sample rate index configured by the container.\n");
870 
871     num_front       = get_bits(gb, 4);
872     num_side        = get_bits(gb, 4);
873     num_back        = get_bits(gb, 4);
874     num_lfe         = get_bits(gb, 2);
875     num_assoc_data  = get_bits(gb, 3);
876     num_cc          = get_bits(gb, 4);
877 
878     if (get_bits1(gb))
879         skip_bits(gb, 4); // mono_mixdown_tag
880     if (get_bits1(gb))
881         skip_bits(gb, 4); // stereo_mixdown_tag
882 
883     if (get_bits1(gb))
884         skip_bits(gb, 3); // mixdown_coeff_index and pseudo_surround
885 
886     if (get_bits_left(gb) < 5 * (num_front + num_side + num_back + num_cc) + 4 *(num_lfe + num_assoc_data + num_cc)) {
887         av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
888         return -1;
889     }
890     decode_channel_map(layout_map       , AAC_CHANNEL_FRONT, gb, num_front);
891     tags = num_front;
892     decode_channel_map(layout_map + tags, AAC_CHANNEL_SIDE,  gb, num_side);
893     tags += num_side;
894     decode_channel_map(layout_map + tags, AAC_CHANNEL_BACK,  gb, num_back);
895     tags += num_back;
896     decode_channel_map(layout_map + tags, AAC_CHANNEL_LFE,   gb, num_lfe);
897     tags += num_lfe;
898 
899     skip_bits_long(gb, 4 * num_assoc_data);
900 
901     decode_channel_map(layout_map + tags, AAC_CHANNEL_CC,    gb, num_cc);
902     tags += num_cc;
903 
904     relative_align_get_bits(gb, byte_align_ref);
905 
906     /* comment field, first byte is length */
907     comment_len = get_bits(gb, 8) * 8;
908     if (get_bits_left(gb) < comment_len) {
909         av_log(avctx, AV_LOG_ERROR, "decode_pce: " overread_err);
910         return AVERROR_INVALIDDATA;
911     }
912     skip_bits_long(gb, comment_len);
913     return tags;
914 }
915 
916 /**
917  * Decode GA "General Audio" specific configuration; reference: table 4.1.
918  *
919  * @param   ac          pointer to AACContext, may be null
920  * @param   avctx       pointer to AVCCodecContext, used for logging
921  *
922  * @return  Returns error status. 0 - OK, !0 - error
923  */
decode_ga_specific_config(AACContext * ac,AVCodecContext * avctx,GetBitContext * gb,int get_bit_alignment,MPEG4AudioConfig * m4ac,int channel_config)924 static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx,
925                                      GetBitContext *gb,
926                                      int get_bit_alignment,
927                                      MPEG4AudioConfig *m4ac,
928                                      int channel_config)
929 {
930     int extension_flag, ret, ep_config, res_flags;
931     uint8_t layout_map[MAX_ELEM_ID*4][3];
932     int tags = 0;
933 
934 #if USE_FIXED
935     if (get_bits1(gb)) { // frameLengthFlag
936         avpriv_report_missing_feature(avctx, "Fixed point 960/120 MDCT window");
937         return AVERROR_PATCHWELCOME;
938     }
939     m4ac->frame_length_short = 0;
940 #else
941     m4ac->frame_length_short = get_bits1(gb);
942     if (m4ac->frame_length_short && m4ac->sbr == 1) {
943       avpriv_report_missing_feature(avctx, "SBR with 960 frame length");
944       if (ac) ac->warned_960_sbr = 1;
945       m4ac->sbr = 0;
946       m4ac->ps = 0;
947     }
948 #endif
949 
950     if (get_bits1(gb))       // dependsOnCoreCoder
951         skip_bits(gb, 14);   // coreCoderDelay
952     extension_flag = get_bits1(gb);
953 
954     if (m4ac->object_type == AOT_AAC_SCALABLE ||
955         m4ac->object_type == AOT_ER_AAC_SCALABLE)
956         skip_bits(gb, 3);     // layerNr
957 
958     if (channel_config == 0) {
959         skip_bits(gb, 4);  // element_instance_tag
960         tags = decode_pce(avctx, m4ac, layout_map, gb, get_bit_alignment);
961         if (tags < 0)
962             return tags;
963     } else {
964         if ((ret = set_default_channel_config(ac, avctx, layout_map,
965                                               &tags, channel_config)))
966             return ret;
967     }
968 
969     if (count_channels(layout_map, tags) > 1) {
970         m4ac->ps = 0;
971     } else if (m4ac->sbr == 1 && m4ac->ps == -1)
972         m4ac->ps = 1;
973 
974     if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
975         return ret;
976 
977     if (extension_flag) {
978         switch (m4ac->object_type) {
979         case AOT_ER_BSAC:
980             skip_bits(gb, 5);    // numOfSubFrame
981             skip_bits(gb, 11);   // layer_length
982             break;
983         case AOT_ER_AAC_LC:
984         case AOT_ER_AAC_LTP:
985         case AOT_ER_AAC_SCALABLE:
986         case AOT_ER_AAC_LD:
987             res_flags = get_bits(gb, 3);
988             if (res_flags) {
989                 avpriv_report_missing_feature(avctx,
990                                               "AAC data resilience (flags %x)",
991                                               res_flags);
992                 return AVERROR_PATCHWELCOME;
993             }
994             break;
995         }
996         skip_bits1(gb);    // extensionFlag3 (TBD in version 3)
997     }
998     switch (m4ac->object_type) {
999     case AOT_ER_AAC_LC:
1000     case AOT_ER_AAC_LTP:
1001     case AOT_ER_AAC_SCALABLE:
1002     case AOT_ER_AAC_LD:
1003         ep_config = get_bits(gb, 2);
1004         if (ep_config) {
1005             avpriv_report_missing_feature(avctx,
1006                                           "epConfig %d", ep_config);
1007             return AVERROR_PATCHWELCOME;
1008         }
1009     }
1010     return 0;
1011 }
1012 
decode_eld_specific_config(AACContext * ac,AVCodecContext * avctx,GetBitContext * gb,MPEG4AudioConfig * m4ac,int channel_config)1013 static int decode_eld_specific_config(AACContext *ac, AVCodecContext *avctx,
1014                                      GetBitContext *gb,
1015                                      MPEG4AudioConfig *m4ac,
1016                                      int channel_config)
1017 {
1018     int ret, ep_config, res_flags;
1019     uint8_t layout_map[MAX_ELEM_ID*4][3];
1020     int tags = 0;
1021     const int ELDEXT_TERM = 0;
1022 
1023     m4ac->ps  = 0;
1024     m4ac->sbr = 0;
1025 #if USE_FIXED
1026     if (get_bits1(gb)) { // frameLengthFlag
1027         avpriv_request_sample(avctx, "960/120 MDCT window");
1028         return AVERROR_PATCHWELCOME;
1029     }
1030 #else
1031     m4ac->frame_length_short = get_bits1(gb);
1032 #endif
1033     res_flags = get_bits(gb, 3);
1034     if (res_flags) {
1035         avpriv_report_missing_feature(avctx,
1036                                       "AAC data resilience (flags %x)",
1037                                       res_flags);
1038         return AVERROR_PATCHWELCOME;
1039     }
1040 
1041     if (get_bits1(gb)) { // ldSbrPresentFlag
1042         avpriv_report_missing_feature(avctx,
1043                                       "Low Delay SBR");
1044         return AVERROR_PATCHWELCOME;
1045     }
1046 
1047     while (get_bits(gb, 4) != ELDEXT_TERM) {
1048         int len = get_bits(gb, 4);
1049         if (len == 15)
1050             len += get_bits(gb, 8);
1051         if (len == 15 + 255)
1052             len += get_bits(gb, 16);
1053         if (get_bits_left(gb) < len * 8 + 4) {
1054             av_log(avctx, AV_LOG_ERROR, overread_err);
1055             return AVERROR_INVALIDDATA;
1056         }
1057         skip_bits_long(gb, 8 * len);
1058     }
1059 
1060     if ((ret = set_default_channel_config(ac, avctx, layout_map,
1061                                           &tags, channel_config)))
1062         return ret;
1063 
1064     if (ac && (ret = output_configure(ac, layout_map, tags, OC_GLOBAL_HDR, 0)))
1065         return ret;
1066 
1067     ep_config = get_bits(gb, 2);
1068     if (ep_config) {
1069         avpriv_report_missing_feature(avctx,
1070                                       "epConfig %d", ep_config);
1071         return AVERROR_PATCHWELCOME;
1072     }
1073     return 0;
1074 }
1075 
1076 /**
1077  * Decode audio specific configuration; reference: table 1.13.
1078  *
1079  * @param   ac          pointer to AACContext, may be null
1080  * @param   avctx       pointer to AVCCodecContext, used for logging
1081  * @param   m4ac        pointer to MPEG4AudioConfig, used for parsing
1082  * @param   gb          buffer holding an audio specific config
1083  * @param   get_bit_alignment relative alignment for byte align operations
1084  * @param   sync_extension look for an appended sync extension
1085  *
1086  * @return  Returns error status or number of consumed bits. <0 - error
1087  */
decode_audio_specific_config_gb(AACContext * ac,AVCodecContext * avctx,MPEG4AudioConfig * m4ac,GetBitContext * gb,int get_bit_alignment,int sync_extension)1088 static int decode_audio_specific_config_gb(AACContext *ac,
1089                                            AVCodecContext *avctx,
1090                                            MPEG4AudioConfig *m4ac,
1091                                            GetBitContext *gb,
1092                                            int get_bit_alignment,
1093                                            int sync_extension)
1094 {
1095     int i, ret;
1096     GetBitContext gbc = *gb;
1097     MPEG4AudioConfig m4ac_bak = *m4ac;
1098 
1099     if ((i = ff_mpeg4audio_get_config_gb(m4ac, &gbc, sync_extension, avctx)) < 0) {
1100         *m4ac = m4ac_bak;
1101         return AVERROR_INVALIDDATA;
1102     }
1103 
1104     if (m4ac->sampling_index > 12) {
1105         av_log(avctx, AV_LOG_ERROR,
1106                "invalid sampling rate index %d\n",
1107                m4ac->sampling_index);
1108         *m4ac = m4ac_bak;
1109         return AVERROR_INVALIDDATA;
1110     }
1111     if (m4ac->object_type == AOT_ER_AAC_LD &&
1112         (m4ac->sampling_index < 3 || m4ac->sampling_index > 7)) {
1113         av_log(avctx, AV_LOG_ERROR,
1114                "invalid low delay sampling rate index %d\n",
1115                m4ac->sampling_index);
1116         *m4ac = m4ac_bak;
1117         return AVERROR_INVALIDDATA;
1118     }
1119 
1120     skip_bits_long(gb, i);
1121 
1122     switch (m4ac->object_type) {
1123     case AOT_AAC_MAIN:
1124     case AOT_AAC_LC:
1125     case AOT_AAC_SSR:
1126     case AOT_AAC_LTP:
1127     case AOT_ER_AAC_LC:
1128     case AOT_ER_AAC_LD:
1129         if ((ret = decode_ga_specific_config(ac, avctx, gb, get_bit_alignment,
1130                                             m4ac, m4ac->chan_config)) < 0)
1131             return ret;
1132         break;
1133     case AOT_ER_AAC_ELD:
1134         if ((ret = decode_eld_specific_config(ac, avctx, gb,
1135                                               m4ac, m4ac->chan_config)) < 0)
1136             return ret;
1137         break;
1138     default:
1139         avpriv_report_missing_feature(avctx,
1140                                       "Audio object type %s%d",
1141                                       m4ac->sbr == 1 ? "SBR+" : "",
1142                                       m4ac->object_type);
1143         return AVERROR(ENOSYS);
1144     }
1145 
1146     ff_dlog(avctx,
1147             "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
1148             m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
1149             m4ac->sample_rate, m4ac->sbr,
1150             m4ac->ps);
1151 
1152     return get_bits_count(gb);
1153 }
1154 
decode_audio_specific_config(AACContext * ac,AVCodecContext * avctx,MPEG4AudioConfig * m4ac,const uint8_t * data,int64_t bit_size,int sync_extension)1155 static int decode_audio_specific_config(AACContext *ac,
1156                                         AVCodecContext *avctx,
1157                                         MPEG4AudioConfig *m4ac,
1158                                         const uint8_t *data, int64_t bit_size,
1159                                         int sync_extension)
1160 {
1161     int i, ret;
1162     GetBitContext gb;
1163 
1164     if (bit_size < 0 || bit_size > INT_MAX) {
1165         av_log(avctx, AV_LOG_ERROR, "Audio specific config size is invalid\n");
1166         return AVERROR_INVALIDDATA;
1167     }
1168 
1169     ff_dlog(avctx, "audio specific config size %d\n", (int)bit_size >> 3);
1170     for (i = 0; i < bit_size >> 3; i++)
1171         ff_dlog(avctx, "%02x ", data[i]);
1172     ff_dlog(avctx, "\n");
1173 
1174     if ((ret = init_get_bits(&gb, data, bit_size)) < 0)
1175         return ret;
1176 
1177     return decode_audio_specific_config_gb(ac, avctx, m4ac, &gb, 0,
1178                                            sync_extension);
1179 }
1180 
1181 /**
1182  * linear congruential pseudorandom number generator
1183  *
1184  * @param   previous_val    pointer to the current state of the generator
1185  *
1186  * @return  Returns a 32-bit pseudorandom integer
1187  */
lcg_random(unsigned previous_val)1188 static av_always_inline int lcg_random(unsigned previous_val)
1189 {
1190     union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
1191     return v.s;
1192 }
1193 
reset_all_predictors(PredictorState * ps)1194 static void reset_all_predictors(PredictorState *ps)
1195 {
1196     int i;
1197     for (i = 0; i < MAX_PREDICTORS; i++)
1198         reset_predict_state(&ps[i]);
1199 }
1200 
sample_rate_idx(int rate)1201 static int sample_rate_idx (int rate)
1202 {
1203          if (92017 <= rate) return 0;
1204     else if (75132 <= rate) return 1;
1205     else if (55426 <= rate) return 2;
1206     else if (46009 <= rate) return 3;
1207     else if (37566 <= rate) return 4;
1208     else if (27713 <= rate) return 5;
1209     else if (23004 <= rate) return 6;
1210     else if (18783 <= rate) return 7;
1211     else if (13856 <= rate) return 8;
1212     else if (11502 <= rate) return 9;
1213     else if (9391  <= rate) return 10;
1214     else                    return 11;
1215 }
1216 
reset_predictor_group(PredictorState * ps,int group_num)1217 static void reset_predictor_group(PredictorState *ps, int group_num)
1218 {
1219     int i;
1220     for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
1221         reset_predict_state(&ps[i]);
1222 }
1223 
1224 static void aacdec_init(AACContext *ac);
1225 
aac_static_table_init(void)1226 static av_cold void aac_static_table_init(void)
1227 {
1228     static VLCElem vlc_buf[304 + 270 + 550 + 300 + 328 +
1229                            294 + 306 + 268 + 510 + 366 + 462];
1230     for (unsigned i = 0, offset = 0; i < 11; i++) {
1231         vlc_spectral[i].table           = &vlc_buf[offset];
1232         vlc_spectral[i].table_allocated = FF_ARRAY_ELEMS(vlc_buf) - offset;
1233         ff_init_vlc_sparse(&vlc_spectral[i], 8, ff_aac_spectral_sizes[i],
1234                            ff_aac_spectral_bits[i],       sizeof(ff_aac_spectral_bits[i][0]),
1235                                                           sizeof(ff_aac_spectral_bits[i][0]),
1236                            ff_aac_spectral_codes[i],      sizeof(ff_aac_spectral_codes[i][0]),
1237                                                           sizeof(ff_aac_spectral_codes[i][0]),
1238                            ff_aac_codebook_vector_idx[i], sizeof(ff_aac_codebook_vector_idx[i][0]),
1239                                                           sizeof(ff_aac_codebook_vector_idx[i][0]),
1240                  INIT_VLC_STATIC_OVERLONG);
1241         offset += vlc_spectral[i].table_size;
1242     }
1243 
1244     AAC_RENAME(ff_aac_sbr_init)();
1245 
1246     ff_aac_tableinit();
1247 
1248     INIT_VLC_STATIC(&vlc_scalefactors, 7,
1249                     FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
1250                     ff_aac_scalefactor_bits,
1251                     sizeof(ff_aac_scalefactor_bits[0]),
1252                     sizeof(ff_aac_scalefactor_bits[0]),
1253                     ff_aac_scalefactor_code,
1254                     sizeof(ff_aac_scalefactor_code[0]),
1255                     sizeof(ff_aac_scalefactor_code[0]),
1256                     352);
1257 
1258     // window initialization
1259 #if !USE_FIXED
1260     AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
1261     AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
1262     AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960);
1263     AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120);
1264     AAC_RENAME(ff_init_ff_sine_windows)(9);
1265     ff_aac_float_common_init();
1266 #else
1267     AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
1268     AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
1269     init_sine_windows_fixed();
1270 #endif
1271 
1272     AAC_RENAME(ff_cbrt_tableinit)();
1273 }
1274 
1275 static AVOnce aac_table_init = AV_ONCE_INIT;
1276 
aac_decode_init(AVCodecContext * avctx)1277 static av_cold int aac_decode_init(AVCodecContext *avctx)
1278 {
1279     AACContext *ac = avctx->priv_data;
1280     int ret;
1281 
1282     if (avctx->sample_rate > 96000)
1283         return AVERROR_INVALIDDATA;
1284 
1285     ret = ff_thread_once(&aac_table_init, &aac_static_table_init);
1286     if (ret != 0)
1287         return AVERROR_UNKNOWN;
1288 
1289     ac->avctx = avctx;
1290     ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
1291 
1292     aacdec_init(ac);
1293 #if USE_FIXED
1294     avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
1295 #else
1296     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
1297 #endif /* USE_FIXED */
1298 
1299     if (avctx->extradata_size > 0) {
1300         if ((ret = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
1301                                                 avctx->extradata,
1302                                                 avctx->extradata_size * 8LL,
1303                                                 1)) < 0)
1304             return ret;
1305     } else {
1306         int sr, i;
1307         uint8_t layout_map[MAX_ELEM_ID*4][3];
1308         int layout_map_tags;
1309 
1310         sr = sample_rate_idx(avctx->sample_rate);
1311         ac->oc[1].m4ac.sampling_index = sr;
1312         ac->oc[1].m4ac.channels = avctx->ch_layout.nb_channels;
1313         ac->oc[1].m4ac.sbr = -1;
1314         ac->oc[1].m4ac.ps = -1;
1315 
1316         for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
1317             if (ff_mpeg4audio_channels[i] == avctx->ch_layout.nb_channels)
1318                 break;
1319         if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
1320             i = 0;
1321         }
1322         ac->oc[1].m4ac.chan_config = i;
1323 
1324         if (ac->oc[1].m4ac.chan_config) {
1325             int ret = set_default_channel_config(ac, avctx, layout_map,
1326                 &layout_map_tags, ac->oc[1].m4ac.chan_config);
1327             if (!ret)
1328                 output_configure(ac, layout_map, layout_map_tags,
1329                                  OC_GLOBAL_HDR, 0);
1330             else if (avctx->err_recognition & AV_EF_EXPLODE)
1331                 return AVERROR_INVALIDDATA;
1332         }
1333     }
1334 
1335     if (avctx->ch_layout.nb_channels > MAX_CHANNELS) {
1336         av_log(avctx, AV_LOG_ERROR, "Too many channels\n");
1337         return AVERROR_INVALIDDATA;
1338     }
1339 
1340 #if USE_FIXED
1341     ac->fdsp = avpriv_alloc_fixed_dsp(avctx->flags & AV_CODEC_FLAG_BITEXACT);
1342 #else
1343     ac->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
1344 #endif /* USE_FIXED */
1345     if (!ac->fdsp) {
1346         return AVERROR(ENOMEM);
1347     }
1348 
1349     ac->random_state = 0x1f2e3d4c;
1350 
1351     AAC_RENAME_32(ff_mdct_init)(&ac->mdct,       11, 1, 1.0 / RANGE15(1024.0));
1352     AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ld,    10, 1, 1.0 / RANGE15(512.0));
1353     AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small,  8, 1, 1.0 / RANGE15(128.0));
1354     AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp,   11, 0, RANGE15(-2.0));
1355 #if !USE_FIXED
1356     ret = ff_mdct15_init(&ac->mdct120, 1, 3, 1.0f/(16*1024*120*2));
1357     if (ret < 0)
1358         return ret;
1359     ret = ff_mdct15_init(&ac->mdct480, 1, 5, 1.0f/(16*1024*960));
1360     if (ret < 0)
1361         return ret;
1362     ret = ff_mdct15_init(&ac->mdct960, 1, 6, 1.0f/(16*1024*960*2));
1363     if (ret < 0)
1364         return ret;
1365 #endif
1366 
1367     return 0;
1368 }
1369 
1370 /**
1371  * Skip data_stream_element; reference: table 4.10.
1372  */
skip_data_stream_element(AACContext * ac,GetBitContext * gb)1373 static int skip_data_stream_element(AACContext *ac, GetBitContext *gb)
1374 {
1375     int byte_align = get_bits1(gb);
1376     int count = get_bits(gb, 8);
1377     if (count == 255)
1378         count += get_bits(gb, 8);
1379     if (byte_align)
1380         align_get_bits(gb);
1381 
1382     if (get_bits_left(gb) < 8 * count) {
1383         av_log(ac->avctx, AV_LOG_ERROR, "skip_data_stream_element: "overread_err);
1384         return AVERROR_INVALIDDATA;
1385     }
1386     skip_bits_long(gb, 8 * count);
1387     return 0;
1388 }
1389 
decode_prediction(AACContext * ac,IndividualChannelStream * ics,GetBitContext * gb)1390 static int decode_prediction(AACContext *ac, IndividualChannelStream *ics,
1391                              GetBitContext *gb)
1392 {
1393     int sfb;
1394     if (get_bits1(gb)) {
1395         ics->predictor_reset_group = get_bits(gb, 5);
1396         if (ics->predictor_reset_group == 0 ||
1397             ics->predictor_reset_group > 30) {
1398             av_log(ac->avctx, AV_LOG_ERROR,
1399                    "Invalid Predictor Reset Group.\n");
1400             return AVERROR_INVALIDDATA;
1401         }
1402     }
1403     for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index]); sfb++) {
1404         ics->prediction_used[sfb] = get_bits1(gb);
1405     }
1406     return 0;
1407 }
1408 
1409 /**
1410  * Decode Long Term Prediction data; reference: table 4.xx.
1411  */
decode_ltp(LongTermPrediction * ltp,GetBitContext * gb,uint8_t max_sfb)1412 static void decode_ltp(LongTermPrediction *ltp,
1413                        GetBitContext *gb, uint8_t max_sfb)
1414 {
1415     int sfb;
1416 
1417     ltp->lag  = get_bits(gb, 11);
1418     ltp->coef = ltp_coef[get_bits(gb, 3)];
1419     for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
1420         ltp->used[sfb] = get_bits1(gb);
1421 }
1422 
1423 /**
1424  * Decode Individual Channel Stream info; reference: table 4.6.
1425  */
decode_ics_info(AACContext * ac,IndividualChannelStream * ics,GetBitContext * gb)1426 static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
1427                            GetBitContext *gb)
1428 {
1429     const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
1430     const int aot = m4ac->object_type;
1431     const int sampling_index = m4ac->sampling_index;
1432     int ret_fail = AVERROR_INVALIDDATA;
1433 
1434     if (aot != AOT_ER_AAC_ELD) {
1435         if (get_bits1(gb)) {
1436             av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
1437             if (ac->avctx->err_recognition & AV_EF_BITSTREAM)
1438                 return AVERROR_INVALIDDATA;
1439         }
1440         ics->window_sequence[1] = ics->window_sequence[0];
1441         ics->window_sequence[0] = get_bits(gb, 2);
1442         if (aot == AOT_ER_AAC_LD &&
1443             ics->window_sequence[0] != ONLY_LONG_SEQUENCE) {
1444             av_log(ac->avctx, AV_LOG_ERROR,
1445                    "AAC LD is only defined for ONLY_LONG_SEQUENCE but "
1446                    "window sequence %d found.\n", ics->window_sequence[0]);
1447             ics->window_sequence[0] = ONLY_LONG_SEQUENCE;
1448             return AVERROR_INVALIDDATA;
1449         }
1450         ics->use_kb_window[1]   = ics->use_kb_window[0];
1451         ics->use_kb_window[0]   = get_bits1(gb);
1452     }
1453     ics->num_window_groups  = 1;
1454     ics->group_len[0]       = 1;
1455     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1456         int i;
1457         ics->max_sfb = get_bits(gb, 4);
1458         for (i = 0; i < 7; i++) {
1459             if (get_bits1(gb)) {
1460                 ics->group_len[ics->num_window_groups - 1]++;
1461             } else {
1462                 ics->num_window_groups++;
1463                 ics->group_len[ics->num_window_groups - 1] = 1;
1464             }
1465         }
1466         ics->num_windows       = 8;
1467         if (m4ac->frame_length_short) {
1468             ics->swb_offset    =  ff_swb_offset_120[sampling_index];
1469             ics->num_swb       = ff_aac_num_swb_120[sampling_index];
1470         } else {
1471             ics->swb_offset    =  ff_swb_offset_128[sampling_index];
1472             ics->num_swb       = ff_aac_num_swb_128[sampling_index];
1473         }
1474         ics->tns_max_bands     = ff_tns_max_bands_128[sampling_index];
1475         ics->predictor_present = 0;
1476     } else {
1477         ics->max_sfb           = get_bits(gb, 6);
1478         ics->num_windows       = 1;
1479         if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD) {
1480             if (m4ac->frame_length_short) {
1481                 ics->swb_offset    =     ff_swb_offset_480[sampling_index];
1482                 ics->num_swb       =    ff_aac_num_swb_480[sampling_index];
1483                 ics->tns_max_bands =  ff_tns_max_bands_480[sampling_index];
1484             } else {
1485                 ics->swb_offset    =     ff_swb_offset_512[sampling_index];
1486                 ics->num_swb       =    ff_aac_num_swb_512[sampling_index];
1487                 ics->tns_max_bands =  ff_tns_max_bands_512[sampling_index];
1488             }
1489             if (!ics->num_swb || !ics->swb_offset) {
1490                 ret_fail = AVERROR_BUG;
1491                 goto fail;
1492             }
1493         } else {
1494             if (m4ac->frame_length_short) {
1495                 ics->num_swb    = ff_aac_num_swb_960[sampling_index];
1496                 ics->swb_offset = ff_swb_offset_960[sampling_index];
1497             } else {
1498                 ics->num_swb    = ff_aac_num_swb_1024[sampling_index];
1499                 ics->swb_offset = ff_swb_offset_1024[sampling_index];
1500             }
1501             ics->tns_max_bands = ff_tns_max_bands_1024[sampling_index];
1502         }
1503         if (aot != AOT_ER_AAC_ELD) {
1504             ics->predictor_present     = get_bits1(gb);
1505             ics->predictor_reset_group = 0;
1506         }
1507         if (ics->predictor_present) {
1508             if (aot == AOT_AAC_MAIN) {
1509                 if (decode_prediction(ac, ics, gb)) {
1510                     goto fail;
1511                 }
1512             } else if (aot == AOT_AAC_LC ||
1513                        aot == AOT_ER_AAC_LC) {
1514                 av_log(ac->avctx, AV_LOG_ERROR,
1515                        "Prediction is not allowed in AAC-LC.\n");
1516                 goto fail;
1517             } else {
1518                 if (aot == AOT_ER_AAC_LD) {
1519                     av_log(ac->avctx, AV_LOG_ERROR,
1520                            "LTP in ER AAC LD not yet implemented.\n");
1521                     ret_fail = AVERROR_PATCHWELCOME;
1522                     goto fail;
1523                 }
1524                 if ((ics->ltp.present = get_bits(gb, 1)))
1525                     decode_ltp(&ics->ltp, gb, ics->max_sfb);
1526             }
1527         }
1528     }
1529 
1530     if (ics->max_sfb > ics->num_swb) {
1531         av_log(ac->avctx, AV_LOG_ERROR,
1532                "Number of scalefactor bands in group (%d) "
1533                "exceeds limit (%d).\n",
1534                ics->max_sfb, ics->num_swb);
1535         goto fail;
1536     }
1537 
1538     return 0;
1539 fail:
1540     ics->max_sfb = 0;
1541     return ret_fail;
1542 }
1543 
1544 /**
1545  * Decode band types (section_data payload); reference: table 4.46.
1546  *
1547  * @param   band_type           array of the used band type
1548  * @param   band_type_run_end   array of the last scalefactor band of a band type run
1549  *
1550  * @return  Returns error status. 0 - OK, !0 - error
1551  */
decode_band_types(AACContext * ac,enum BandType band_type[120],int band_type_run_end[120],GetBitContext * gb,IndividualChannelStream * ics)1552 static int decode_band_types(AACContext *ac, enum BandType band_type[120],
1553                              int band_type_run_end[120], GetBitContext *gb,
1554                              IndividualChannelStream *ics)
1555 {
1556     int g, idx = 0;
1557     const int bits = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? 3 : 5;
1558     for (g = 0; g < ics->num_window_groups; g++) {
1559         int k = 0;
1560         while (k < ics->max_sfb) {
1561             uint8_t sect_end = k;
1562             int sect_len_incr;
1563             int sect_band_type = get_bits(gb, 4);
1564             if (sect_band_type == 12) {
1565                 av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n");
1566                 return AVERROR_INVALIDDATA;
1567             }
1568             do {
1569                 sect_len_incr = get_bits(gb, bits);
1570                 sect_end += sect_len_incr;
1571                 if (get_bits_left(gb) < 0) {
1572                     av_log(ac->avctx, AV_LOG_ERROR, "decode_band_types: "overread_err);
1573                     return AVERROR_INVALIDDATA;
1574                 }
1575                 if (sect_end > ics->max_sfb) {
1576                     av_log(ac->avctx, AV_LOG_ERROR,
1577                            "Number of bands (%d) exceeds limit (%d).\n",
1578                            sect_end, ics->max_sfb);
1579                     return AVERROR_INVALIDDATA;
1580                 }
1581             } while (sect_len_incr == (1 << bits) - 1);
1582             for (; k < sect_end; k++) {
1583                 band_type        [idx]   = sect_band_type;
1584                 band_type_run_end[idx++] = sect_end;
1585             }
1586         }
1587     }
1588     return 0;
1589 }
1590 
1591 /**
1592  * Decode scalefactors; reference: table 4.47.
1593  *
1594  * @param   global_gain         first scalefactor value as scalefactors are differentially coded
1595  * @param   band_type           array of the used band type
1596  * @param   band_type_run_end   array of the last scalefactor band of a band type run
1597  * @param   sf                  array of scalefactors or intensity stereo positions
1598  *
1599  * @return  Returns error status. 0 - OK, !0 - error
1600  */
decode_scalefactors(AACContext * ac,INTFLOAT sf[120],GetBitContext * gb,unsigned int global_gain,IndividualChannelStream * ics,enum BandType band_type[120],int band_type_run_end[120])1601 static int decode_scalefactors(AACContext *ac, INTFLOAT sf[120], GetBitContext *gb,
1602                                unsigned int global_gain,
1603                                IndividualChannelStream *ics,
1604                                enum BandType band_type[120],
1605                                int band_type_run_end[120])
1606 {
1607     int g, i, idx = 0;
1608     int offset[3] = { global_gain, global_gain - NOISE_OFFSET, 0 };
1609     int clipped_offset;
1610     int noise_flag = 1;
1611     for (g = 0; g < ics->num_window_groups; g++) {
1612         for (i = 0; i < ics->max_sfb;) {
1613             int run_end = band_type_run_end[idx];
1614             if (band_type[idx] == ZERO_BT) {
1615                 for (; i < run_end; i++, idx++)
1616                     sf[idx] = FIXR(0.);
1617             } else if ((band_type[idx] == INTENSITY_BT) ||
1618                        (band_type[idx] == INTENSITY_BT2)) {
1619                 for (; i < run_end; i++, idx++) {
1620                     offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
1621                     clipped_offset = av_clip(offset[2], -155, 100);
1622                     if (offset[2] != clipped_offset) {
1623                         avpriv_request_sample(ac->avctx,
1624                                               "If you heard an audible artifact, there may be a bug in the decoder. "
1625                                               "Clipped intensity stereo position (%d -> %d)",
1626                                               offset[2], clipped_offset);
1627                     }
1628 #if USE_FIXED
1629                     sf[idx] = 100 - clipped_offset;
1630 #else
1631                     sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
1632 #endif /* USE_FIXED */
1633                 }
1634             } else if (band_type[idx] == NOISE_BT) {
1635                 for (; i < run_end; i++, idx++) {
1636                     if (noise_flag-- > 0)
1637                         offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE;
1638                     else
1639                         offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
1640                     clipped_offset = av_clip(offset[1], -100, 155);
1641                     if (offset[1] != clipped_offset) {
1642                         avpriv_request_sample(ac->avctx,
1643                                               "If you heard an audible artifact, there may be a bug in the decoder. "
1644                                               "Clipped noise gain (%d -> %d)",
1645                                               offset[1], clipped_offset);
1646                     }
1647 #if USE_FIXED
1648                     sf[idx] = -(100 + clipped_offset);
1649 #else
1650                     sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
1651 #endif /* USE_FIXED */
1652                 }
1653             } else {
1654                 for (; i < run_end; i++, idx++) {
1655                     offset[0] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - SCALE_DIFF_ZERO;
1656                     if (offset[0] > 255U) {
1657                         av_log(ac->avctx, AV_LOG_ERROR,
1658                                "Scalefactor (%d) out of range.\n", offset[0]);
1659                         return AVERROR_INVALIDDATA;
1660                     }
1661 #if USE_FIXED
1662                     sf[idx] = -offset[0];
1663 #else
1664                     sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
1665 #endif /* USE_FIXED */
1666                 }
1667             }
1668         }
1669     }
1670     return 0;
1671 }
1672 
1673 /**
1674  * Decode pulse data; reference: table 4.7.
1675  */
decode_pulses(Pulse * pulse,GetBitContext * gb,const uint16_t * swb_offset,int num_swb)1676 static int decode_pulses(Pulse *pulse, GetBitContext *gb,
1677                          const uint16_t *swb_offset, int num_swb)
1678 {
1679     int i, pulse_swb;
1680     pulse->num_pulse = get_bits(gb, 2) + 1;
1681     pulse_swb        = get_bits(gb, 6);
1682     if (pulse_swb >= num_swb)
1683         return -1;
1684     pulse->pos[0]    = swb_offset[pulse_swb];
1685     pulse->pos[0]   += get_bits(gb, 5);
1686     if (pulse->pos[0] >= swb_offset[num_swb])
1687         return -1;
1688     pulse->amp[0]    = get_bits(gb, 4);
1689     for (i = 1; i < pulse->num_pulse; i++) {
1690         pulse->pos[i] = get_bits(gb, 5) + pulse->pos[i - 1];
1691         if (pulse->pos[i] >= swb_offset[num_swb])
1692             return -1;
1693         pulse->amp[i] = get_bits(gb, 4);
1694     }
1695     return 0;
1696 }
1697 
1698 /**
1699  * Decode Temporal Noise Shaping data; reference: table 4.48.
1700  *
1701  * @return  Returns error status. 0 - OK, !0 - error
1702  */
decode_tns(AACContext * ac,TemporalNoiseShaping * tns,GetBitContext * gb,const IndividualChannelStream * ics)1703 static int decode_tns(AACContext *ac, TemporalNoiseShaping *tns,
1704                       GetBitContext *gb, const IndividualChannelStream *ics)
1705 {
1706     int w, filt, i, coef_len, coef_res, coef_compress;
1707     const int is8 = ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE;
1708     const int tns_max_order = is8 ? 7 : ac->oc[1].m4ac.object_type == AOT_AAC_MAIN ? 20 : 12;
1709     for (w = 0; w < ics->num_windows; w++) {
1710         if ((tns->n_filt[w] = get_bits(gb, 2 - is8))) {
1711             coef_res = get_bits1(gb);
1712 
1713             for (filt = 0; filt < tns->n_filt[w]; filt++) {
1714                 int tmp2_idx;
1715                 tns->length[w][filt] = get_bits(gb, 6 - 2 * is8);
1716 
1717                 if ((tns->order[w][filt] = get_bits(gb, 5 - 2 * is8)) > tns_max_order) {
1718                     av_log(ac->avctx, AV_LOG_ERROR,
1719                            "TNS filter order %d is greater than maximum %d.\n",
1720                            tns->order[w][filt], tns_max_order);
1721                     tns->order[w][filt] = 0;
1722                     return AVERROR_INVALIDDATA;
1723                 }
1724                 if (tns->order[w][filt]) {
1725                     tns->direction[w][filt] = get_bits1(gb);
1726                     coef_compress = get_bits1(gb);
1727                     coef_len = coef_res + 3 - coef_compress;
1728                     tmp2_idx = 2 * coef_compress + coef_res;
1729 
1730                     for (i = 0; i < tns->order[w][filt]; i++)
1731                         tns->coef[w][filt][i] = tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
1732                 }
1733             }
1734         }
1735     }
1736     return 0;
1737 }
1738 
1739 /**
1740  * Decode Mid/Side data; reference: table 4.54.
1741  *
1742  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
1743  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
1744  *                      [3] reserved for scalable AAC
1745  */
decode_mid_side_stereo(ChannelElement * cpe,GetBitContext * gb,int ms_present)1746 static void decode_mid_side_stereo(ChannelElement *cpe, GetBitContext *gb,
1747                                    int ms_present)
1748 {
1749     int idx;
1750     int max_idx = cpe->ch[0].ics.num_window_groups * cpe->ch[0].ics.max_sfb;
1751     if (ms_present == 1) {
1752         for (idx = 0; idx < max_idx; idx++)
1753             cpe->ms_mask[idx] = get_bits1(gb);
1754     } else if (ms_present == 2) {
1755         memset(cpe->ms_mask, 1, max_idx * sizeof(cpe->ms_mask[0]));
1756     }
1757 }
1758 
1759 /**
1760  * Decode spectral data; reference: table 4.50.
1761  * Dequantize and scale spectral data; reference: 4.6.3.3.
1762  *
1763  * @param   coef            array of dequantized, scaled spectral data
1764  * @param   sf              array of scalefactors or intensity stereo positions
1765  * @param   pulse_present   set if pulses are present
1766  * @param   pulse           pointer to pulse data struct
1767  * @param   band_type       array of the used band type
1768  *
1769  * @return  Returns error status. 0 - OK, !0 - error
1770  */
decode_spectrum_and_dequant(AACContext * ac,INTFLOAT coef[1024],GetBitContext * gb,const INTFLOAT sf[120],int pulse_present,const Pulse * pulse,const IndividualChannelStream * ics,enum BandType band_type[120])1771 static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024],
1772                                        GetBitContext *gb, const INTFLOAT sf[120],
1773                                        int pulse_present, const Pulse *pulse,
1774                                        const IndividualChannelStream *ics,
1775                                        enum BandType band_type[120])
1776 {
1777     int i, k, g, idx = 0;
1778     const int c = 1024 / ics->num_windows;
1779     const uint16_t *offsets = ics->swb_offset;
1780     INTFLOAT *coef_base = coef;
1781 
1782     for (g = 0; g < ics->num_windows; g++)
1783         memset(coef + g * 128 + offsets[ics->max_sfb], 0,
1784                sizeof(INTFLOAT) * (c - offsets[ics->max_sfb]));
1785 
1786     for (g = 0; g < ics->num_window_groups; g++) {
1787         unsigned g_len = ics->group_len[g];
1788 
1789         for (i = 0; i < ics->max_sfb; i++, idx++) {
1790             const unsigned cbt_m1 = band_type[idx] - 1;
1791             INTFLOAT *cfo = coef + offsets[i];
1792             int off_len = offsets[i + 1] - offsets[i];
1793             int group;
1794 
1795             if (cbt_m1 >= INTENSITY_BT2 - 1) {
1796                 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1797                     memset(cfo, 0, off_len * sizeof(*cfo));
1798                 }
1799             } else if (cbt_m1 == NOISE_BT - 1) {
1800                 for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1801                     INTFLOAT band_energy;
1802 #if USE_FIXED
1803                     for (k = 0; k < off_len; k++) {
1804                         ac->random_state  = lcg_random(ac->random_state);
1805                         cfo[k] = ac->random_state >> 3;
1806                     }
1807 
1808                     band_energy = ac->fdsp->scalarproduct_fixed(cfo, cfo, off_len);
1809                     band_energy = fixed_sqrt(band_energy, 31);
1810                     noise_scale(cfo, sf[idx], band_energy, off_len);
1811 #else
1812                     float scale;
1813 
1814                     for (k = 0; k < off_len; k++) {
1815                         ac->random_state  = lcg_random(ac->random_state);
1816                         cfo[k] = ac->random_state;
1817                     }
1818 
1819                     band_energy = ac->fdsp->scalarproduct_float(cfo, cfo, off_len);
1820                     scale = sf[idx] / sqrtf(band_energy);
1821                     ac->fdsp->vector_fmul_scalar(cfo, cfo, scale, off_len);
1822 #endif /* USE_FIXED */
1823                 }
1824             } else {
1825 #if !USE_FIXED
1826                 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
1827 #endif /* !USE_FIXED */
1828                 const VLCElem *vlc_tab = vlc_spectral[cbt_m1].table;
1829                 OPEN_READER(re, gb);
1830 
1831                 switch (cbt_m1 >> 1) {
1832                 case 0:
1833                     for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1834                         INTFLOAT *cf = cfo;
1835                         int len = off_len;
1836 
1837                         do {
1838                             int code;
1839                             unsigned cb_idx;
1840 
1841                             UPDATE_CACHE(re, gb);
1842                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1843                             cb_idx = code;
1844 #if USE_FIXED
1845                             cf = DEC_SQUAD(cf, cb_idx);
1846 #else
1847                             cf = VMUL4(cf, vq, cb_idx, sf + idx);
1848 #endif /* USE_FIXED */
1849                         } while (len -= 4);
1850                     }
1851                     break;
1852 
1853                 case 1:
1854                     for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1855                         INTFLOAT *cf = cfo;
1856                         int len = off_len;
1857 
1858                         do {
1859                             int code;
1860                             unsigned nnz;
1861                             unsigned cb_idx;
1862                             uint32_t bits;
1863 
1864                             UPDATE_CACHE(re, gb);
1865                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1866                             cb_idx = code;
1867                             nnz = cb_idx >> 8 & 15;
1868                             bits = nnz ? GET_CACHE(re, gb) : 0;
1869                             LAST_SKIP_BITS(re, gb, nnz);
1870 #if USE_FIXED
1871                             cf = DEC_UQUAD(cf, cb_idx, bits);
1872 #else
1873                             cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1874 #endif /* USE_FIXED */
1875                         } while (len -= 4);
1876                     }
1877                     break;
1878 
1879                 case 2:
1880                     for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1881                         INTFLOAT *cf = cfo;
1882                         int len = off_len;
1883 
1884                         do {
1885                             int code;
1886                             unsigned cb_idx;
1887 
1888                             UPDATE_CACHE(re, gb);
1889                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1890                             cb_idx = code;
1891 #if USE_FIXED
1892                             cf = DEC_SPAIR(cf, cb_idx);
1893 #else
1894                             cf = VMUL2(cf, vq, cb_idx, sf + idx);
1895 #endif /* USE_FIXED */
1896                         } while (len -= 2);
1897                     }
1898                     break;
1899 
1900                 case 3:
1901                 case 4:
1902                     for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1903                         INTFLOAT *cf = cfo;
1904                         int len = off_len;
1905 
1906                         do {
1907                             int code;
1908                             unsigned nnz;
1909                             unsigned cb_idx;
1910                             unsigned sign;
1911 
1912                             UPDATE_CACHE(re, gb);
1913                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1914                             cb_idx = code;
1915                             nnz = cb_idx >> 8 & 15;
1916                             sign = nnz ? SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12) : 0;
1917                             LAST_SKIP_BITS(re, gb, nnz);
1918 #if USE_FIXED
1919                             cf = DEC_UPAIR(cf, cb_idx, sign);
1920 #else
1921                             cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1922 #endif /* USE_FIXED */
1923                         } while (len -= 2);
1924                     }
1925                     break;
1926 
1927                 default:
1928                     for (group = 0; group < (AAC_SIGNE)g_len; group++, cfo+=128) {
1929 #if USE_FIXED
1930                         int *icf = cfo;
1931                         int v;
1932 #else
1933                         float *cf = cfo;
1934                         uint32_t *icf = (uint32_t *) cf;
1935 #endif /* USE_FIXED */
1936                         int len = off_len;
1937 
1938                         do {
1939                             int code;
1940                             unsigned nzt, nnz;
1941                             unsigned cb_idx;
1942                             uint32_t bits;
1943                             int j;
1944 
1945                             UPDATE_CACHE(re, gb);
1946                             GET_VLC(code, re, gb, vlc_tab, 8, 2);
1947                             cb_idx = code;
1948 
1949                             if (cb_idx == 0x0000) {
1950                                 *icf++ = 0;
1951                                 *icf++ = 0;
1952                                 continue;
1953                             }
1954 
1955                             nnz = cb_idx >> 12;
1956                             nzt = cb_idx >> 8;
1957                             bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1958                             LAST_SKIP_BITS(re, gb, nnz);
1959 
1960                             for (j = 0; j < 2; j++) {
1961                                 if (nzt & 1<<j) {
1962                                     uint32_t b;
1963                                     int n;
1964                                     /* The total length of escape_sequence must be < 22 bits according
1965                                        to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1966                                     UPDATE_CACHE(re, gb);
1967                                     b = GET_CACHE(re, gb);
1968                                     b = 31 - av_log2(~b);
1969 
1970                                     if (b > 8) {
1971                                         av_log(ac->avctx, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1972                                         return AVERROR_INVALIDDATA;
1973                                     }
1974 
1975                                     SKIP_BITS(re, gb, b + 1);
1976                                     b += 4;
1977                                     n = (1 << b) + SHOW_UBITS(re, gb, b);
1978                                     LAST_SKIP_BITS(re, gb, b);
1979 #if USE_FIXED
1980                                     v = n;
1981                                     if (bits & 1U<<31)
1982                                         v = -v;
1983                                     *icf++ = v;
1984 #else
1985                                     *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31);
1986 #endif /* USE_FIXED */
1987                                     bits <<= 1;
1988                                 } else {
1989 #if USE_FIXED
1990                                     v = cb_idx & 15;
1991                                     if (bits & 1U<<31)
1992                                         v = -v;
1993                                     *icf++ = v;
1994 #else
1995                                     unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1996                                     *icf++ = (bits & 1U<<31) | v;
1997 #endif /* USE_FIXED */
1998                                     bits <<= !!v;
1999                                 }
2000                                 cb_idx >>= 4;
2001                             }
2002                         } while (len -= 2);
2003 #if !USE_FIXED
2004                         ac->fdsp->vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
2005 #endif /* !USE_FIXED */
2006                     }
2007                 }
2008 
2009                 CLOSE_READER(re, gb);
2010             }
2011         }
2012         coef += g_len << 7;
2013     }
2014 
2015     if (pulse_present) {
2016         idx = 0;
2017         for (i = 0; i < pulse->num_pulse; i++) {
2018             INTFLOAT co = coef_base[ pulse->pos[i] ];
2019             while (offsets[idx + 1] <= pulse->pos[i])
2020                 idx++;
2021             if (band_type[idx] != NOISE_BT && sf[idx]) {
2022                 INTFLOAT ico = -pulse->amp[i];
2023 #if USE_FIXED
2024                 if (co) {
2025                     ico = co + (co > 0 ? -ico : ico);
2026                 }
2027                 coef_base[ pulse->pos[i] ] = ico;
2028 #else
2029                 if (co) {
2030                     co /= sf[idx];
2031                     ico = co / sqrtf(sqrtf(fabsf(co))) + (co > 0 ? -ico : ico);
2032                 }
2033                 coef_base[ pulse->pos[i] ] = cbrtf(fabsf(ico)) * ico * sf[idx];
2034 #endif /* USE_FIXED */
2035             }
2036         }
2037     }
2038 #if USE_FIXED
2039     coef = coef_base;
2040     idx = 0;
2041     for (g = 0; g < ics->num_window_groups; g++) {
2042         unsigned g_len = ics->group_len[g];
2043 
2044         for (i = 0; i < ics->max_sfb; i++, idx++) {
2045             const unsigned cbt_m1 = band_type[idx] - 1;
2046             int *cfo = coef + offsets[i];
2047             int off_len = offsets[i + 1] - offsets[i];
2048             int group;
2049 
2050             if (cbt_m1 < NOISE_BT - 1) {
2051                 for (group = 0; group < (int)g_len; group++, cfo+=128) {
2052                     ac->vector_pow43(cfo, off_len);
2053                     ac->subband_scale(cfo, cfo, sf[idx], 34, off_len, ac->avctx);
2054                 }
2055             }
2056         }
2057         coef += g_len << 7;
2058     }
2059 #endif /* USE_FIXED */
2060     return 0;
2061 }
2062 
2063 /**
2064  * Apply AAC-Main style frequency domain prediction.
2065  */
apply_prediction(AACContext * ac,SingleChannelElement * sce)2066 static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
2067 {
2068     int sfb, k;
2069 
2070     if (!sce->ics.predictor_initialized) {
2071         reset_all_predictors(sce->predictor_state);
2072         sce->ics.predictor_initialized = 1;
2073     }
2074 
2075     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2076         for (sfb = 0;
2077              sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
2078              sfb++) {
2079             for (k = sce->ics.swb_offset[sfb];
2080                  k < sce->ics.swb_offset[sfb + 1];
2081                  k++) {
2082                 predict(&sce->predictor_state[k], &sce->coeffs[k],
2083                         sce->ics.predictor_present &&
2084                         sce->ics.prediction_used[sfb]);
2085             }
2086         }
2087         if (sce->ics.predictor_reset_group)
2088             reset_predictor_group(sce->predictor_state,
2089                                   sce->ics.predictor_reset_group);
2090     } else
2091         reset_all_predictors(sce->predictor_state);
2092 }
2093 
decode_gain_control(SingleChannelElement * sce,GetBitContext * gb)2094 static void decode_gain_control(SingleChannelElement * sce, GetBitContext * gb)
2095 {
2096     // wd_num, wd_test, aloc_size
2097     static const uint8_t gain_mode[4][3] = {
2098         {1, 0, 5},  // ONLY_LONG_SEQUENCE = 0,
2099         {2, 1, 2},  // LONG_START_SEQUENCE,
2100         {8, 0, 2},  // EIGHT_SHORT_SEQUENCE,
2101         {2, 1, 5},  // LONG_STOP_SEQUENCE
2102     };
2103 
2104     const int mode = sce->ics.window_sequence[0];
2105     uint8_t bd, wd, ad;
2106 
2107     // FIXME: Store the gain control data on |sce| and do something with it.
2108     uint8_t max_band = get_bits(gb, 2);
2109     for (bd = 0; bd < max_band; bd++) {
2110         for (wd = 0; wd < gain_mode[mode][0]; wd++) {
2111             uint8_t adjust_num = get_bits(gb, 3);
2112             for (ad = 0; ad < adjust_num; ad++) {
2113                 skip_bits(gb, 4 + ((wd == 0 && gain_mode[mode][1])
2114                                      ? 4
2115                                      : gain_mode[mode][2]));
2116             }
2117         }
2118     }
2119 }
2120 
2121 /**
2122  * Decode an individual_channel_stream payload; reference: table 4.44.
2123  *
2124  * @param   common_window   Channels have independent [0], or shared [1], Individual Channel Stream information.
2125  * @param   scale_flag      scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
2126  *
2127  * @return  Returns error status. 0 - OK, !0 - error
2128  */
decode_ics(AACContext * ac,SingleChannelElement * sce,GetBitContext * gb,int common_window,int scale_flag)2129 static int decode_ics(AACContext *ac, SingleChannelElement *sce,
2130                       GetBitContext *gb, int common_window, int scale_flag)
2131 {
2132     Pulse pulse;
2133     TemporalNoiseShaping    *tns = &sce->tns;
2134     IndividualChannelStream *ics = &sce->ics;
2135     INTFLOAT *out = sce->coeffs;
2136     int global_gain, eld_syntax, er_syntax, pulse_present = 0;
2137     int ret;
2138 
2139     eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
2140     er_syntax  = ac->oc[1].m4ac.object_type == AOT_ER_AAC_LC ||
2141                  ac->oc[1].m4ac.object_type == AOT_ER_AAC_LTP ||
2142                  ac->oc[1].m4ac.object_type == AOT_ER_AAC_LD ||
2143                  ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
2144 
2145     /* This assignment is to silence a GCC warning about the variable being used
2146      * uninitialized when in fact it always is.
2147      */
2148     pulse.num_pulse = 0;
2149 
2150     global_gain = get_bits(gb, 8);
2151 
2152     if (!common_window && !scale_flag) {
2153         ret = decode_ics_info(ac, ics, gb);
2154         if (ret < 0)
2155             goto fail;
2156     }
2157 
2158     if ((ret = decode_band_types(ac, sce->band_type,
2159                                  sce->band_type_run_end, gb, ics)) < 0)
2160         goto fail;
2161     if ((ret = decode_scalefactors(ac, sce->sf, gb, global_gain, ics,
2162                                   sce->band_type, sce->band_type_run_end)) < 0)
2163         goto fail;
2164 
2165     pulse_present = 0;
2166     if (!scale_flag) {
2167         if (!eld_syntax && (pulse_present = get_bits1(gb))) {
2168             if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2169                 av_log(ac->avctx, AV_LOG_ERROR,
2170                        "Pulse tool not allowed in eight short sequence.\n");
2171                 ret = AVERROR_INVALIDDATA;
2172                 goto fail;
2173             }
2174             if (decode_pulses(&pulse, gb, ics->swb_offset, ics->num_swb)) {
2175                 av_log(ac->avctx, AV_LOG_ERROR,
2176                        "Pulse data corrupt or invalid.\n");
2177                 ret = AVERROR_INVALIDDATA;
2178                 goto fail;
2179             }
2180         }
2181         tns->present = get_bits1(gb);
2182         if (tns->present && !er_syntax) {
2183             ret = decode_tns(ac, tns, gb, ics);
2184             if (ret < 0)
2185                 goto fail;
2186         }
2187         if (!eld_syntax && get_bits1(gb)) {
2188             decode_gain_control(sce, gb);
2189             if (!ac->warned_gain_control) {
2190                 avpriv_report_missing_feature(ac->avctx, "Gain control");
2191                 ac->warned_gain_control = 1;
2192             }
2193         }
2194         // I see no textual basis in the spec for this occurring after SSR gain
2195         // control, but this is what both reference and real implmentations do
2196         if (tns->present && er_syntax) {
2197             ret = decode_tns(ac, tns, gb, ics);
2198             if (ret < 0)
2199                 goto fail;
2200         }
2201     }
2202 
2203     ret = decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present,
2204                                     &pulse, ics, sce->band_type);
2205     if (ret < 0)
2206         goto fail;
2207 
2208     if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN && !common_window)
2209         apply_prediction(ac, sce);
2210 
2211     return 0;
2212 fail:
2213     tns->present = 0;
2214     return ret;
2215 }
2216 
2217 /**
2218  * Mid/Side stereo decoding; reference: 4.6.8.1.3.
2219  */
apply_mid_side_stereo(AACContext * ac,ChannelElement * cpe)2220 static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe)
2221 {
2222     const IndividualChannelStream *ics = &cpe->ch[0].ics;
2223     INTFLOAT *ch0 = cpe->ch[0].coeffs;
2224     INTFLOAT *ch1 = cpe->ch[1].coeffs;
2225     int g, i, group, idx = 0;
2226     const uint16_t *offsets = ics->swb_offset;
2227     for (g = 0; g < ics->num_window_groups; g++) {
2228         for (i = 0; i < ics->max_sfb; i++, idx++) {
2229             if (cpe->ms_mask[idx] &&
2230                 cpe->ch[0].band_type[idx] < NOISE_BT &&
2231                 cpe->ch[1].band_type[idx] < NOISE_BT) {
2232 #if USE_FIXED
2233                 for (group = 0; group < ics->group_len[g]; group++) {
2234                     ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
2235                                                 ch1 + group * 128 + offsets[i],
2236                                                 offsets[i+1] - offsets[i]);
2237 #else
2238                 for (group = 0; group < ics->group_len[g]; group++) {
2239                     ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
2240                                                ch1 + group * 128 + offsets[i],
2241                                                offsets[i+1] - offsets[i]);
2242 #endif /* USE_FIXED */
2243                 }
2244             }
2245         }
2246         ch0 += ics->group_len[g] * 128;
2247         ch1 += ics->group_len[g] * 128;
2248     }
2249 }
2250 
2251 /**
2252  * intensity stereo decoding; reference: 4.6.8.2.3
2253  *
2254  * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
2255  *                      [1] mask is decoded from bitstream; [2] mask is all 1s;
2256  *                      [3] reserved for scalable AAC
2257  */
2258 static void apply_intensity_stereo(AACContext *ac,
2259                                    ChannelElement *cpe, int ms_present)
2260 {
2261     const IndividualChannelStream *ics = &cpe->ch[1].ics;
2262     SingleChannelElement         *sce1 = &cpe->ch[1];
2263     INTFLOAT *coef0 = cpe->ch[0].coeffs, *coef1 = cpe->ch[1].coeffs;
2264     const uint16_t *offsets = ics->swb_offset;
2265     int g, group, i, idx = 0;
2266     int c;
2267     INTFLOAT scale;
2268     for (g = 0; g < ics->num_window_groups; g++) {
2269         for (i = 0; i < ics->max_sfb;) {
2270             if (sce1->band_type[idx] == INTENSITY_BT ||
2271                 sce1->band_type[idx] == INTENSITY_BT2) {
2272                 const int bt_run_end = sce1->band_type_run_end[idx];
2273                 for (; i < bt_run_end; i++, idx++) {
2274                     c = -1 + 2 * (sce1->band_type[idx] - 14);
2275                     if (ms_present)
2276                         c *= 1 - 2 * cpe->ms_mask[idx];
2277                     scale = c * sce1->sf[idx];
2278                     for (group = 0; group < ics->group_len[g]; group++)
2279 #if USE_FIXED
2280                         ac->subband_scale(coef1 + group * 128 + offsets[i],
2281                                       coef0 + group * 128 + offsets[i],
2282                                       scale,
2283                                       23,
2284                                       offsets[i + 1] - offsets[i] ,ac->avctx);
2285 #else
2286                         ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + offsets[i],
2287                                                     coef0 + group * 128 + offsets[i],
2288                                                     scale,
2289                                                     offsets[i + 1] - offsets[i]);
2290 #endif /* USE_FIXED */
2291                 }
2292             } else {
2293                 int bt_run_end = sce1->band_type_run_end[idx];
2294                 idx += bt_run_end - i;
2295                 i    = bt_run_end;
2296             }
2297         }
2298         coef0 += ics->group_len[g] * 128;
2299         coef1 += ics->group_len[g] * 128;
2300     }
2301 }
2302 
2303 /**
2304  * Decode a channel_pair_element; reference: table 4.4.
2305  *
2306  * @return  Returns error status. 0 - OK, !0 - error
2307  */
2308 static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
2309 {
2310     int i, ret, common_window, ms_present = 0;
2311     int eld_syntax = ac->oc[1].m4ac.object_type == AOT_ER_AAC_ELD;
2312 
2313     common_window = eld_syntax || get_bits1(gb);
2314     if (common_window) {
2315         if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
2316             return AVERROR_INVALIDDATA;
2317         i = cpe->ch[1].ics.use_kb_window[0];
2318         cpe->ch[1].ics = cpe->ch[0].ics;
2319         cpe->ch[1].ics.use_kb_window[1] = i;
2320         if (cpe->ch[1].ics.predictor_present &&
2321             (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
2322             if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
2323                 decode_ltp(&cpe->ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
2324         ms_present = get_bits(gb, 2);
2325         if (ms_present == 3) {
2326             av_log(ac->avctx, AV_LOG_ERROR, "ms_present = 3 is reserved.\n");
2327             return AVERROR_INVALIDDATA;
2328         } else if (ms_present)
2329             decode_mid_side_stereo(cpe, gb, ms_present);
2330     }
2331     if ((ret = decode_ics(ac, &cpe->ch[0], gb, common_window, 0)))
2332         return ret;
2333     if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
2334         return ret;
2335 
2336     if (common_window) {
2337         if (ms_present)
2338             apply_mid_side_stereo(ac, cpe);
2339         if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
2340             apply_prediction(ac, &cpe->ch[0]);
2341             apply_prediction(ac, &cpe->ch[1]);
2342         }
2343     }
2344 
2345     apply_intensity_stereo(ac, cpe, ms_present);
2346     return 0;
2347 }
2348 
2349 static const float cce_scale[] = {
2350     1.09050773266525765921, //2^(1/8)
2351     1.18920711500272106672, //2^(1/4)
2352     M_SQRT2,
2353     2,
2354 };
2355 
2356 /**
2357  * Decode coupling_channel_element; reference: table 4.8.
2358  *
2359  * @return  Returns error status. 0 - OK, !0 - error
2360  */
2361 static int decode_cce(AACContext *ac, GetBitContext *gb, ChannelElement *che)
2362 {
2363     int num_gain = 0;
2364     int c, g, sfb, ret;
2365     int sign;
2366     INTFLOAT scale;
2367     SingleChannelElement *sce = &che->ch[0];
2368     ChannelCoupling     *coup = &che->coup;
2369 
2370     coup->coupling_point = 2 * get_bits1(gb);
2371     coup->num_coupled = get_bits(gb, 3);
2372     for (c = 0; c <= coup->num_coupled; c++) {
2373         num_gain++;
2374         coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
2375         coup->id_select[c] = get_bits(gb, 4);
2376         if (coup->type[c] == TYPE_CPE) {
2377             coup->ch_select[c] = get_bits(gb, 2);
2378             if (coup->ch_select[c] == 3)
2379                 num_gain++;
2380         } else
2381             coup->ch_select[c] = 2;
2382     }
2383     coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
2384 
2385     sign  = get_bits(gb, 1);
2386 #if USE_FIXED
2387     scale = get_bits(gb, 2);
2388 #else
2389     scale = cce_scale[get_bits(gb, 2)];
2390 #endif
2391 
2392     if ((ret = decode_ics(ac, sce, gb, 0, 0)))
2393         return ret;
2394 
2395     for (c = 0; c < num_gain; c++) {
2396         int idx  = 0;
2397         int cge  = 1;
2398         int gain = 0;
2399         INTFLOAT gain_cache = FIXR10(1.);
2400         if (c) {
2401             cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
2402             gain = cge ? get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60: 0;
2403             gain_cache = GET_GAIN(scale, gain);
2404 #if USE_FIXED
2405             if ((abs(gain_cache)-1024) >> 3 > 30)
2406                 return AVERROR(ERANGE);
2407 #endif
2408         }
2409         if (coup->coupling_point == AFTER_IMDCT) {
2410             coup->gain[c][0] = gain_cache;
2411         } else {
2412             for (g = 0; g < sce->ics.num_window_groups; g++) {
2413                 for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
2414                     if (sce->band_type[idx] != ZERO_BT) {
2415                         if (!cge) {
2416                             int t = get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
2417                             if (t) {
2418                                 int s = 1;
2419                                 t = gain += t;
2420                                 if (sign) {
2421                                     s  -= 2 * (t & 0x1);
2422                                     t >>= 1;
2423                                 }
2424                                 gain_cache = GET_GAIN(scale, t) * s;
2425 #if USE_FIXED
2426                                 if ((abs(gain_cache)-1024) >> 3 > 30)
2427                                     return AVERROR(ERANGE);
2428 #endif
2429                             }
2430                         }
2431                         coup->gain[c][idx] = gain_cache;
2432                     }
2433                 }
2434             }
2435         }
2436     }
2437     return 0;
2438 }
2439 
2440 /**
2441  * Parse whether channels are to be excluded from Dynamic Range Compression; reference: table 4.53.
2442  *
2443  * @return  Returns number of bytes consumed.
2444  */
2445 static int decode_drc_channel_exclusions(DynamicRangeControl *che_drc,
2446                                          GetBitContext *gb)
2447 {
2448     int i;
2449     int num_excl_chan = 0;
2450 
2451     do {
2452         for (i = 0; i < 7; i++)
2453             che_drc->exclude_mask[num_excl_chan++] = get_bits1(gb);
2454     } while (num_excl_chan < MAX_CHANNELS - 7 && get_bits1(gb));
2455 
2456     return num_excl_chan / 7;
2457 }
2458 
2459 /**
2460  * Decode dynamic range information; reference: table 4.52.
2461  *
2462  * @return  Returns number of bytes consumed.
2463  */
2464 static int decode_dynamic_range(DynamicRangeControl *che_drc,
2465                                 GetBitContext *gb)
2466 {
2467     int n             = 1;
2468     int drc_num_bands = 1;
2469     int i;
2470 
2471     /* pce_tag_present? */
2472     if (get_bits1(gb)) {
2473         che_drc->pce_instance_tag  = get_bits(gb, 4);
2474         skip_bits(gb, 4); // tag_reserved_bits
2475         n++;
2476     }
2477 
2478     /* excluded_chns_present? */
2479     if (get_bits1(gb)) {
2480         n += decode_drc_channel_exclusions(che_drc, gb);
2481     }
2482 
2483     /* drc_bands_present? */
2484     if (get_bits1(gb)) {
2485         che_drc->band_incr            = get_bits(gb, 4);
2486         che_drc->interpolation_scheme = get_bits(gb, 4);
2487         n++;
2488         drc_num_bands += che_drc->band_incr;
2489         for (i = 0; i < drc_num_bands; i++) {
2490             che_drc->band_top[i] = get_bits(gb, 8);
2491             n++;
2492         }
2493     }
2494 
2495     /* prog_ref_level_present? */
2496     if (get_bits1(gb)) {
2497         che_drc->prog_ref_level = get_bits(gb, 7);
2498         skip_bits1(gb); // prog_ref_level_reserved_bits
2499         n++;
2500     }
2501 
2502     for (i = 0; i < drc_num_bands; i++) {
2503         che_drc->dyn_rng_sgn[i] = get_bits1(gb);
2504         che_drc->dyn_rng_ctl[i] = get_bits(gb, 7);
2505         n++;
2506     }
2507 
2508     return n;
2509 }
2510 
2511 static int decode_fill(AACContext *ac, GetBitContext *gb, int len) {
2512     uint8_t buf[256];
2513     int i, major, minor;
2514 
2515     if (len < 13+7*8)
2516         goto unknown;
2517 
2518     get_bits(gb, 13); len -= 13;
2519 
2520     for(i=0; i+1<sizeof(buf) && len>=8; i++, len-=8)
2521         buf[i] = get_bits(gb, 8);
2522 
2523     buf[i] = 0;
2524     if (ac->avctx->debug & FF_DEBUG_PICT_INFO)
2525         av_log(ac->avctx, AV_LOG_DEBUG, "FILL:%s\n", buf);
2526 
2527     if (sscanf(buf, "libfaac %d.%d", &major, &minor) == 2){
2528         ac->avctx->internal->skip_samples = 1024;
2529     }
2530 
2531 unknown:
2532     skip_bits_long(gb, len);
2533 
2534     return 0;
2535 }
2536 
2537 /**
2538  * Decode extension data (incomplete); reference: table 4.51.
2539  *
2540  * @param   cnt length of TYPE_FIL syntactic element in bytes
2541  *
2542  * @return Returns number of bytes consumed
2543  */
2544 static int decode_extension_payload(AACContext *ac, GetBitContext *gb, int cnt,
2545                                     ChannelElement *che, enum RawDataBlockType elem_type)
2546 {
2547     int crc_flag = 0;
2548     int res = cnt;
2549     int type = get_bits(gb, 4);
2550 
2551     if (ac->avctx->debug & FF_DEBUG_STARTCODE)
2552         av_log(ac->avctx, AV_LOG_DEBUG, "extension type: %d len:%d\n", type, cnt);
2553 
2554     switch (type) { // extension type
2555     case EXT_SBR_DATA_CRC:
2556         crc_flag++;
2557     case EXT_SBR_DATA:
2558         if (!che) {
2559             av_log(ac->avctx, AV_LOG_ERROR, "SBR was found before the first channel element.\n");
2560             return res;
2561         } else if (ac->oc[1].m4ac.frame_length_short) {
2562             if (!ac->warned_960_sbr)
2563               avpriv_report_missing_feature(ac->avctx,
2564                                             "SBR with 960 frame length");
2565             ac->warned_960_sbr = 1;
2566             skip_bits_long(gb, 8 * cnt - 4);
2567             return res;
2568         } else if (!ac->oc[1].m4ac.sbr) {
2569             av_log(ac->avctx, AV_LOG_ERROR, "SBR signaled to be not-present but was found in the bitstream.\n");
2570             skip_bits_long(gb, 8 * cnt - 4);
2571             return res;
2572         } else if (ac->oc[1].m4ac.sbr == -1 && ac->oc[1].status == OC_LOCKED) {
2573             av_log(ac->avctx, AV_LOG_ERROR, "Implicit SBR was found with a first occurrence after the first frame.\n");
2574             skip_bits_long(gb, 8 * cnt - 4);
2575             return res;
2576         } else if (ac->oc[1].m4ac.ps == -1 && ac->oc[1].status < OC_LOCKED &&
2577                    ac->avctx->ch_layout.nb_channels == 1) {
2578             ac->oc[1].m4ac.sbr = 1;
2579             ac->oc[1].m4ac.ps = 1;
2580             ac->avctx->profile = FF_PROFILE_AAC_HE_V2;
2581             output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags,
2582                              ac->oc[1].status, 1);
2583         } else {
2584             ac->oc[1].m4ac.sbr = 1;
2585             ac->avctx->profile = FF_PROFILE_AAC_HE;
2586         }
2587         res = AAC_RENAME(ff_decode_sbr_extension)(ac, &che->sbr, gb, crc_flag, cnt, elem_type);
2588         break;
2589     case EXT_DYNAMIC_RANGE:
2590         res = decode_dynamic_range(&ac->che_drc, gb);
2591         break;
2592     case EXT_FILL:
2593         decode_fill(ac, gb, 8 * cnt - 4);
2594         break;
2595     case EXT_FILL_DATA:
2596     case EXT_DATA_ELEMENT:
2597     default:
2598         skip_bits_long(gb, 8 * cnt - 4);
2599         break;
2600     };
2601     return res;
2602 }
2603 
2604 /**
2605  * Decode Temporal Noise Shaping filter coefficients and apply all-pole filters; reference: 4.6.9.3.
2606  *
2607  * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
2608  * @param   coef    spectral coefficients
2609  */
2610 static void apply_tns(INTFLOAT coef_param[1024], TemporalNoiseShaping *tns,
2611                       IndividualChannelStream *ics, int decode)
2612 {
2613     const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
2614     int w, filt, m, i;
2615     int bottom, top, order, start, end, size, inc;
2616     INTFLOAT lpc[TNS_MAX_ORDER];
2617     INTFLOAT tmp[TNS_MAX_ORDER+1];
2618     UINTFLOAT *coef = coef_param;
2619 
2620     if(!mmm)
2621         return;
2622 
2623     for (w = 0; w < ics->num_windows; w++) {
2624         bottom = ics->num_swb;
2625         for (filt = 0; filt < tns->n_filt[w]; filt++) {
2626             top    = bottom;
2627             bottom = FFMAX(0, top - tns->length[w][filt]);
2628             order  = tns->order[w][filt];
2629             if (order == 0)
2630                 continue;
2631 
2632             // tns_decode_coef
2633             AAC_RENAME(compute_lpc_coefs)(tns->coef[w][filt], order, lpc, 0, 0, 0);
2634 
2635             start = ics->swb_offset[FFMIN(bottom, mmm)];
2636             end   = ics->swb_offset[FFMIN(   top, mmm)];
2637             if ((size = end - start) <= 0)
2638                 continue;
2639             if (tns->direction[w][filt]) {
2640                 inc = -1;
2641                 start = end - 1;
2642             } else {
2643                 inc = 1;
2644             }
2645             start += w * 128;
2646 
2647             if (decode) {
2648                 // ar filter
2649                 for (m = 0; m < size; m++, start += inc)
2650                     for (i = 1; i <= FFMIN(m, order); i++)
2651                         coef[start] -= AAC_MUL26((INTFLOAT)coef[start - i * inc], lpc[i - 1]);
2652             } else {
2653                 // ma filter
2654                 for (m = 0; m < size; m++, start += inc) {
2655                     tmp[0] = coef[start];
2656                     for (i = 1; i <= FFMIN(m, order); i++)
2657                         coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]);
2658                     for (i = order; i > 0; i--)
2659                         tmp[i] = tmp[i - 1];
2660                 }
2661             }
2662         }
2663     }
2664 }
2665 
2666 /**
2667  *  Apply windowing and MDCT to obtain the spectral
2668  *  coefficient from the predicted sample by LTP.
2669  */
2670 static void windowing_and_mdct_ltp(AACContext *ac, INTFLOAT *out,
2671                                    INTFLOAT *in, IndividualChannelStream *ics)
2672 {
2673     const INTFLOAT *lwindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
2674     const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
2675     const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
2676     const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
2677 
2678     if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
2679         ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
2680     } else {
2681         memset(in, 0, 448 * sizeof(*in));
2682         ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
2683     }
2684     if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
2685         ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
2686     } else {
2687         ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
2688         memset(in + 1024 + 576, 0, 448 * sizeof(*in));
2689     }
2690     ac->mdct_ltp.mdct_calc(&ac->mdct_ltp, out, in);
2691 }
2692 
2693 /**
2694  * Apply the long term prediction
2695  */
2696 static void apply_ltp(AACContext *ac, SingleChannelElement *sce)
2697 {
2698     const LongTermPrediction *ltp = &sce->ics.ltp;
2699     const uint16_t *offsets = sce->ics.swb_offset;
2700     int i, sfb;
2701 
2702     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
2703         INTFLOAT *predTime = sce->ret;
2704         INTFLOAT *predFreq = ac->buf_mdct;
2705         int16_t num_samples = 2048;
2706 
2707         if (ltp->lag < 1024)
2708             num_samples = ltp->lag + 1024;
2709         for (i = 0; i < num_samples; i++)
2710             predTime[i] = AAC_MUL30(sce->ltp_state[i + 2048 - ltp->lag], ltp->coef);
2711         memset(&predTime[i], 0, (2048 - i) * sizeof(*predTime));
2712 
2713         ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
2714 
2715         if (sce->tns.present)
2716             ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
2717 
2718         for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
2719             if (ltp->used[sfb])
2720                 for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
2721                     sce->coeffs[i] += (UINTFLOAT)predFreq[i];
2722     }
2723 }
2724 
2725 /**
2726  * Update the LTP buffer for next frame
2727  */
2728 static void update_ltp(AACContext *ac, SingleChannelElement *sce)
2729 {
2730     IndividualChannelStream *ics = &sce->ics;
2731     INTFLOAT *saved     = sce->saved;
2732     INTFLOAT *saved_ltp = sce->coeffs;
2733     const INTFLOAT *lwindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
2734     const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
2735     int i;
2736 
2737     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2738         memcpy(saved_ltp,       saved, 512 * sizeof(*saved_ltp));
2739         memset(saved_ltp + 576, 0,     448 * sizeof(*saved_ltp));
2740         ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2741 
2742         for (i = 0; i < 64; i++)
2743             saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
2744     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2745         memcpy(saved_ltp,       ac->buf_mdct + 512, 448 * sizeof(*saved_ltp));
2746         memset(saved_ltp + 576, 0,                  448 * sizeof(*saved_ltp));
2747         ac->fdsp->vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
2748 
2749         for (i = 0; i < 64; i++)
2750             saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], swindow[63 - i]);
2751     } else { // LONG_STOP or ONLY_LONG
2752         ac->fdsp->vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
2753 
2754         for (i = 0; i < 512; i++)
2755             saved_ltp[i + 512] = AAC_MUL31(ac->buf_mdct[1023 - i], lwindow[511 - i]);
2756     }
2757 
2758     memcpy(sce->ltp_state,      sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
2759     memcpy(sce->ltp_state+1024, sce->ret,            1024 * sizeof(*sce->ltp_state));
2760     memcpy(sce->ltp_state+2048, saved_ltp,           1024 * sizeof(*sce->ltp_state));
2761 }
2762 
2763 /**
2764  * Conduct IMDCT and windowing.
2765  */
2766 static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
2767 {
2768     IndividualChannelStream *ics = &sce->ics;
2769     INTFLOAT *in    = sce->coeffs;
2770     INTFLOAT *out   = sce->ret;
2771     INTFLOAT *saved = sce->saved;
2772     const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
2773     const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
2774     const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
2775     INTFLOAT *buf  = ac->buf_mdct;
2776     INTFLOAT *temp = ac->temp;
2777     int i;
2778 
2779     // imdct
2780     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2781         for (i = 0; i < 1024; i += 128)
2782             ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
2783     } else {
2784         ac->mdct.imdct_half(&ac->mdct, buf, in);
2785 #if USE_FIXED
2786         for (i=0; i<1024; i++)
2787           buf[i] = (buf[i] + 4LL) >> 3;
2788 #endif /* USE_FIXED */
2789     }
2790 
2791     /* window overlapping
2792      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2793      * and long to short transitions are considered to be short to short
2794      * transitions. This leaves just two cases (long to long and short to short)
2795      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2796      */
2797     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2798             (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2799         ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
2800     } else {
2801         memcpy(                         out,               saved,            448 * sizeof(*out));
2802 
2803         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2804             ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,      buf + 0*128, swindow_prev, 64);
2805             ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow,      64);
2806             ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow,      64);
2807             ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow,      64);
2808             ac->fdsp->vector_fmul_window(temp,              buf + 3*128 + 64, buf + 4*128, swindow,      64);
2809             memcpy(                     out + 448 + 4*128, temp, 64 * sizeof(*out));
2810         } else {
2811             ac->fdsp->vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
2812             memcpy(                     out + 576,         buf + 64,         448 * sizeof(*out));
2813         }
2814     }
2815 
2816     // buffer update
2817     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2818         memcpy(                     saved,       temp + 64,         64 * sizeof(*saved));
2819         ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
2820         ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
2821         ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
2822         memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
2823     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2824         memcpy(                     saved,       buf + 512,        448 * sizeof(*saved));
2825         memcpy(                     saved + 448, buf + 7*128 + 64,  64 * sizeof(*saved));
2826     } else { // LONG_STOP or ONLY_LONG
2827         memcpy(                     saved,       buf + 512,        512 * sizeof(*saved));
2828     }
2829 }
2830 
2831 /**
2832  * Conduct IMDCT and windowing.
2833  */
2834 static void imdct_and_windowing_960(AACContext *ac, SingleChannelElement *sce)
2835 {
2836 #if !USE_FIXED
2837     IndividualChannelStream *ics = &sce->ics;
2838     INTFLOAT *in    = sce->coeffs;
2839     INTFLOAT *out   = sce->ret;
2840     INTFLOAT *saved = sce->saved;
2841     const INTFLOAT *swindow      = ics->use_kb_window[0] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
2842     const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_long_960) : AAC_RENAME(sine_960);
2843     const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(aac_kbd_short_120) : AAC_RENAME(sine_120);
2844     INTFLOAT *buf  = ac->buf_mdct;
2845     INTFLOAT *temp = ac->temp;
2846     int i;
2847 
2848     // imdct
2849     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2850         for (i = 0; i < 8; i++)
2851             ac->mdct120->imdct_half(ac->mdct120, buf + i * 120, in + i * 128, 1);
2852     } else {
2853         ac->mdct960->imdct_half(ac->mdct960, buf, in, 1);
2854     }
2855 
2856     /* window overlapping
2857      * NOTE: To simplify the overlapping code, all 'meaningless' short to long
2858      * and long to short transitions are considered to be short to short
2859      * transitions. This leaves just two cases (long to long and short to short)
2860      * with a little special sauce for EIGHT_SHORT_SEQUENCE.
2861      */
2862 
2863     if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
2864         (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
2865         ac->fdsp->vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 480);
2866     } else {
2867         memcpy(                          out,               saved,            420 * sizeof(*out));
2868 
2869         if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2870             ac->fdsp->vector_fmul_window(out + 420 + 0*120, saved + 420,      buf + 0*120, swindow_prev, 60);
2871             ac->fdsp->vector_fmul_window(out + 420 + 1*120, buf + 0*120 + 60, buf + 1*120, swindow,      60);
2872             ac->fdsp->vector_fmul_window(out + 420 + 2*120, buf + 1*120 + 60, buf + 2*120, swindow,      60);
2873             ac->fdsp->vector_fmul_window(out + 420 + 3*120, buf + 2*120 + 60, buf + 3*120, swindow,      60);
2874             ac->fdsp->vector_fmul_window(temp,              buf + 3*120 + 60, buf + 4*120, swindow,      60);
2875             memcpy(                      out + 420 + 4*120, temp, 60 * sizeof(*out));
2876         } else {
2877             ac->fdsp->vector_fmul_window(out + 420,         saved + 420,      buf,         swindow_prev, 60);
2878             memcpy(                      out + 540,         buf + 60,         420 * sizeof(*out));
2879         }
2880     }
2881 
2882     // buffer update
2883     if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
2884         memcpy(                      saved,       temp + 60,         60 * sizeof(*saved));
2885         ac->fdsp->vector_fmul_window(saved + 60,  buf + 4*120 + 60, buf + 5*120, swindow, 60);
2886         ac->fdsp->vector_fmul_window(saved + 180, buf + 5*120 + 60, buf + 6*120, swindow, 60);
2887         ac->fdsp->vector_fmul_window(saved + 300, buf + 6*120 + 60, buf + 7*120, swindow, 60);
2888         memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
2889     } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
2890         memcpy(                      saved,       buf + 480,        420 * sizeof(*saved));
2891         memcpy(                      saved + 420, buf + 7*120 + 60,  60 * sizeof(*saved));
2892     } else { // LONG_STOP or ONLY_LONG
2893         memcpy(                      saved,       buf + 480,        480 * sizeof(*saved));
2894     }
2895 #endif
2896 }
2897 static void imdct_and_windowing_ld(AACContext *ac, SingleChannelElement *sce)
2898 {
2899     IndividualChannelStream *ics = &sce->ics;
2900     INTFLOAT *in    = sce->coeffs;
2901     INTFLOAT *out   = sce->ret;
2902     INTFLOAT *saved = sce->saved;
2903     INTFLOAT *buf  = ac->buf_mdct;
2904 #if USE_FIXED
2905     int i;
2906 #endif /* USE_FIXED */
2907 
2908     // imdct
2909     ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
2910 
2911 #if USE_FIXED
2912     for (i = 0; i < 1024; i++)
2913         buf[i] = (buf[i] + 2) >> 2;
2914 #endif /* USE_FIXED */
2915 
2916     // window overlapping
2917     if (ics->use_kb_window[1]) {
2918         // AAC LD uses a low overlap sine window instead of a KBD window
2919         memcpy(out, saved, 192 * sizeof(*out));
2920         ac->fdsp->vector_fmul_window(out + 192, saved + 192, buf, AAC_RENAME2(sine_128), 64);
2921         memcpy(                     out + 320, buf + 64, 192 * sizeof(*out));
2922     } else {
2923         ac->fdsp->vector_fmul_window(out, saved, buf, AAC_RENAME2(sine_512), 256);
2924     }
2925 
2926     // buffer update
2927     memcpy(saved, buf + 256, 256 * sizeof(*saved));
2928 }
2929 
2930 static void imdct_and_windowing_eld(AACContext *ac, SingleChannelElement *sce)
2931 {
2932     UINTFLOAT *in   = sce->coeffs;
2933     INTFLOAT *out   = sce->ret;
2934     INTFLOAT *saved = sce->saved;
2935     INTFLOAT *buf  = ac->buf_mdct;
2936     int i;
2937     const int n  = ac->oc[1].m4ac.frame_length_short ? 480 : 512;
2938     const int n2 = n >> 1;
2939     const int n4 = n >> 2;
2940     const INTFLOAT *const window = n == 480 ? AAC_RENAME(ff_aac_eld_window_480) :
2941                                            AAC_RENAME(ff_aac_eld_window_512);
2942 
2943     // Inverse transform, mapped to the conventional IMDCT by
2944     // Chivukula, R.K.; Reznik, Y.A.; Devarajan, V.,
2945     // "Efficient algorithms for MPEG-4 AAC-ELD, AAC-LD and AAC-LC filterbanks,"
2946     // International Conference on Audio, Language and Image Processing, ICALIP 2008.
2947     // URL: http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4590245&isnumber=4589950
2948     for (i = 0; i < n2; i+=2) {
2949         INTFLOAT temp;
2950         temp =  in[i    ]; in[i    ] = -in[n - 1 - i]; in[n - 1 - i] = temp;
2951         temp = -in[i + 1]; in[i + 1] =  in[n - 2 - i]; in[n - 2 - i] = temp;
2952     }
2953 #if !USE_FIXED
2954     if (n == 480)
2955         ac->mdct480->imdct_half(ac->mdct480, buf, in, 1);
2956     else
2957 #endif
2958         ac->mdct.imdct_half(&ac->mdct_ld, buf, in);
2959 
2960 #if USE_FIXED
2961     for (i = 0; i < 1024; i++)
2962       buf[i] = (buf[i] + 1) >> 1;
2963 #endif /* USE_FIXED */
2964 
2965     for (i = 0; i < n; i+=2) {
2966         buf[i] = -buf[i];
2967     }
2968     // Like with the regular IMDCT at this point we still have the middle half
2969     // of a transform but with even symmetry on the left and odd symmetry on
2970     // the right
2971 
2972     // window overlapping
2973     // The spec says to use samples [0..511] but the reference decoder uses
2974     // samples [128..639].
2975     for (i = n4; i < n2; i ++) {
2976         out[i - n4] = AAC_MUL31(   buf[    n2 - 1 - i] , window[i       - n4]) +
2977                       AAC_MUL31( saved[        i + n2] , window[i +   n - n4]) +
2978                       AAC_MUL31(-saved[n + n2 - 1 - i] , window[i + 2*n - n4]) +
2979                       AAC_MUL31(-saved[  2*n + n2 + i] , window[i + 3*n - n4]);
2980     }
2981     for (i = 0; i < n2; i ++) {
2982         out[n4 + i] = AAC_MUL31(   buf[              i] , window[i + n2       - n4]) +
2983                       AAC_MUL31(-saved[      n - 1 - i] , window[i + n2 +   n - n4]) +
2984                       AAC_MUL31(-saved[          n + i] , window[i + n2 + 2*n - n4]) +
2985                       AAC_MUL31( saved[2*n + n - 1 - i] , window[i + n2 + 3*n - n4]);
2986     }
2987     for (i = 0; i < n4; i ++) {
2988         out[n2 + n4 + i] = AAC_MUL31(   buf[    i + n2] , window[i +   n - n4]) +
2989                            AAC_MUL31(-saved[n2 - 1 - i] , window[i + 2*n - n4]) +
2990                            AAC_MUL31(-saved[n + n2 + i] , window[i + 3*n - n4]);
2991     }
2992 
2993     // buffer update
2994     memmove(saved + n, saved, 2 * n * sizeof(*saved));
2995     memcpy( saved,       buf,     n * sizeof(*saved));
2996 }
2997 
2998 /**
2999  * channel coupling transformation interface
3000  *
3001  * @param   apply_coupling_method   pointer to (in)dependent coupling function
3002  */
3003 static void apply_channel_coupling(AACContext *ac, ChannelElement *cc,
3004                                    enum RawDataBlockType type, int elem_id,
3005                                    enum CouplingPoint coupling_point,
3006                                    void (*apply_coupling_method)(AACContext *ac, SingleChannelElement *target, ChannelElement *cce, int index))
3007 {
3008     int i, c;
3009 
3010     for (i = 0; i < MAX_ELEM_ID; i++) {
3011         ChannelElement *cce = ac->che[TYPE_CCE][i];
3012         int index = 0;
3013 
3014         if (cce && cce->coup.coupling_point == coupling_point) {
3015             ChannelCoupling *coup = &cce->coup;
3016 
3017             for (c = 0; c <= coup->num_coupled; c++) {
3018                 if (coup->type[c] == type && coup->id_select[c] == elem_id) {
3019                     if (coup->ch_select[c] != 1) {
3020                         apply_coupling_method(ac, &cc->ch[0], cce, index);
3021                         if (coup->ch_select[c] != 0)
3022                             index++;
3023                     }
3024                     if (coup->ch_select[c] != 2)
3025                         apply_coupling_method(ac, &cc->ch[1], cce, index++);
3026                 } else
3027                     index += 1 + (coup->ch_select[c] == 3);
3028             }
3029         }
3030     }
3031 }
3032 
3033 /**
3034  * Convert spectral data to samples, applying all supported tools as appropriate.
3035  */
3036 static void spectral_to_sample(AACContext *ac, int samples)
3037 {
3038     int i, type;
3039     void (*imdct_and_window)(AACContext *ac, SingleChannelElement *sce);
3040     switch (ac->oc[1].m4ac.object_type) {
3041     case AOT_ER_AAC_LD:
3042         imdct_and_window = imdct_and_windowing_ld;
3043         break;
3044     case AOT_ER_AAC_ELD:
3045         imdct_and_window = imdct_and_windowing_eld;
3046         break;
3047     default:
3048         if (ac->oc[1].m4ac.frame_length_short)
3049             imdct_and_window = imdct_and_windowing_960;
3050         else
3051             imdct_and_window = ac->imdct_and_windowing;
3052     }
3053     for (type = 3; type >= 0; type--) {
3054         for (i = 0; i < MAX_ELEM_ID; i++) {
3055             ChannelElement *che = ac->che[type][i];
3056             if (che && che->present) {
3057                 if (type <= TYPE_CPE)
3058                     apply_channel_coupling(ac, che, type, i, BEFORE_TNS, AAC_RENAME(apply_dependent_coupling));
3059                 if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
3060                     if (che->ch[0].ics.predictor_present) {
3061                         if (che->ch[0].ics.ltp.present)
3062                             ac->apply_ltp(ac, &che->ch[0]);
3063                         if (che->ch[1].ics.ltp.present && type == TYPE_CPE)
3064                             ac->apply_ltp(ac, &che->ch[1]);
3065                     }
3066                 }
3067                 if (che->ch[0].tns.present)
3068                     ac->apply_tns(che->ch[0].coeffs, &che->ch[0].tns, &che->ch[0].ics, 1);
3069                 if (che->ch[1].tns.present)
3070                     ac->apply_tns(che->ch[1].coeffs, &che->ch[1].tns, &che->ch[1].ics, 1);
3071                 if (type <= TYPE_CPE)
3072                     apply_channel_coupling(ac, che, type, i, BETWEEN_TNS_AND_IMDCT, AAC_RENAME(apply_dependent_coupling));
3073                 if (type != TYPE_CCE || che->coup.coupling_point == AFTER_IMDCT) {
3074                     imdct_and_window(ac, &che->ch[0]);
3075                     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
3076                         ac->update_ltp(ac, &che->ch[0]);
3077                     if (type == TYPE_CPE) {
3078                         imdct_and_window(ac, &che->ch[1]);
3079                         if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP)
3080                             ac->update_ltp(ac, &che->ch[1]);
3081                     }
3082                     if (ac->oc[1].m4ac.sbr > 0) {
3083                         AAC_RENAME(ff_sbr_apply)(ac, &che->sbr, type, che->ch[0].ret, che->ch[1].ret);
3084                     }
3085                 }
3086                 if (type <= TYPE_CCE)
3087                     apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, AAC_RENAME(apply_independent_coupling));
3088 
3089 #if USE_FIXED
3090                 {
3091                     int j;
3092                     /* preparation for resampler */
3093                     for(j = 0; j<samples; j++){
3094                         che->ch[0].ret[j] = (int32_t)av_clip64((int64_t)che->ch[0].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000;
3095                         if(type == TYPE_CPE)
3096                             che->ch[1].ret[j] = (int32_t)av_clip64((int64_t)che->ch[1].ret[j]*128, INT32_MIN, INT32_MAX-0x8000)+0x8000;
3097                     }
3098                 }
3099 #endif /* USE_FIXED */
3100                 che->present = 0;
3101             } else if (che) {
3102                 av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d missing \n", type, i);
3103             }
3104         }
3105     }
3106 }
3107 
3108 static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb)
3109 {
3110     int size;
3111     AACADTSHeaderInfo hdr_info;
3112     uint8_t layout_map[MAX_ELEM_ID*4][3];
3113     int layout_map_tags, ret;
3114 
3115     size = ff_adts_header_parse(gb, &hdr_info);
3116     if (size > 0) {
3117         if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) {
3118             // This is 2 for "VLB " audio in NSV files.
3119             // See samples/nsv/vlb_audio.
3120             avpriv_report_missing_feature(ac->avctx,
3121                                           "More than one AAC RDB per ADTS frame");
3122             ac->warned_num_aac_frames = 1;
3123         }
3124         push_output_configuration(ac);
3125         if (hdr_info.chan_config) {
3126             ac->oc[1].m4ac.chan_config = hdr_info.chan_config;
3127             if ((ret = set_default_channel_config(ac, ac->avctx,
3128                                                   layout_map,
3129                                                   &layout_map_tags,
3130                                                   hdr_info.chan_config)) < 0)
3131                 return ret;
3132             if ((ret = output_configure(ac, layout_map, layout_map_tags,
3133                                         FFMAX(ac->oc[1].status,
3134                                               OC_TRIAL_FRAME), 0)) < 0)
3135                 return ret;
3136         } else {
3137             ac->oc[1].m4ac.chan_config = 0;
3138             /**
3139              * dual mono frames in Japanese DTV can have chan_config 0
3140              * WITHOUT specifying PCE.
3141              *  thus, set dual mono as default.
3142              */
3143             if (ac->dmono_mode && ac->oc[0].status == OC_NONE) {
3144                 layout_map_tags = 2;
3145                 layout_map[0][0] = layout_map[1][0] = TYPE_SCE;
3146                 layout_map[0][2] = layout_map[1][2] = AAC_CHANNEL_FRONT;
3147                 layout_map[0][1] = 0;
3148                 layout_map[1][1] = 1;
3149                 if (output_configure(ac, layout_map, layout_map_tags,
3150                                      OC_TRIAL_FRAME, 0))
3151                     return -7;
3152             }
3153         }
3154         ac->oc[1].m4ac.sample_rate     = hdr_info.sample_rate;
3155         ac->oc[1].m4ac.sampling_index  = hdr_info.sampling_index;
3156         ac->oc[1].m4ac.object_type     = hdr_info.object_type;
3157         ac->oc[1].m4ac.frame_length_short = 0;
3158         if (ac->oc[0].status != OC_LOCKED ||
3159             ac->oc[0].m4ac.chan_config != hdr_info.chan_config ||
3160             ac->oc[0].m4ac.sample_rate != hdr_info.sample_rate) {
3161             ac->oc[1].m4ac.sbr = -1;
3162             ac->oc[1].m4ac.ps  = -1;
3163         }
3164         if (!hdr_info.crc_absent)
3165             skip_bits(gb, 16);
3166     }
3167     return size;
3168 }
3169 
3170 static int aac_decode_er_frame(AVCodecContext *avctx, void *data,
3171                                int *got_frame_ptr, GetBitContext *gb)
3172 {
3173     AACContext *ac = avctx->priv_data;
3174     const MPEG4AudioConfig *const m4ac = &ac->oc[1].m4ac;
3175     ChannelElement *che;
3176     int err, i;
3177     int samples = m4ac->frame_length_short ? 960 : 1024;
3178     int chan_config = m4ac->chan_config;
3179     int aot = m4ac->object_type;
3180 
3181     if (aot == AOT_ER_AAC_LD || aot == AOT_ER_AAC_ELD)
3182         samples >>= 1;
3183 
3184     ac->frame = data;
3185 
3186     if ((err = frame_configure_elements(avctx)) < 0)
3187         return err;
3188 
3189     // The FF_PROFILE_AAC_* defines are all object_type - 1
3190     // This may lead to an undefined profile being signaled
3191     ac->avctx->profile = aot - 1;
3192 
3193     ac->tags_mapped = 0;
3194 
3195     if (chan_config < 0 || (chan_config >= 8 && chan_config < 11) || chan_config >= 13) {
3196         avpriv_request_sample(avctx, "Unknown ER channel configuration %d",
3197                               chan_config);
3198         return AVERROR_INVALIDDATA;
3199     }
3200     for (i = 0; i < tags_per_config[chan_config]; i++) {
3201         const int elem_type = aac_channel_layout_map[chan_config-1][i][0];
3202         const int elem_id   = aac_channel_layout_map[chan_config-1][i][1];
3203         if (!(che=get_che(ac, elem_type, elem_id))) {
3204             av_log(ac->avctx, AV_LOG_ERROR,
3205                    "channel element %d.%d is not allocated\n",
3206                    elem_type, elem_id);
3207             return AVERROR_INVALIDDATA;
3208         }
3209         che->present = 1;
3210         if (aot != AOT_ER_AAC_ELD)
3211             skip_bits(gb, 4);
3212         switch (elem_type) {
3213         case TYPE_SCE:
3214             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
3215             break;
3216         case TYPE_CPE:
3217             err = decode_cpe(ac, gb, che);
3218             break;
3219         case TYPE_LFE:
3220             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
3221             break;
3222         }
3223         if (err < 0)
3224             return err;
3225     }
3226 
3227     spectral_to_sample(ac, samples);
3228 
3229     if (!ac->frame->data[0] && samples) {
3230         av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
3231         return AVERROR_INVALIDDATA;
3232     }
3233 
3234     ac->frame->nb_samples = samples;
3235     ac->frame->sample_rate = avctx->sample_rate;
3236     *got_frame_ptr = 1;
3237 
3238     skip_bits_long(gb, get_bits_left(gb));
3239     return 0;
3240 }
3241 
3242 static int aac_decode_frame_int(AVCodecContext *avctx, AVFrame *frame,
3243                                 int *got_frame_ptr, GetBitContext *gb,
3244                                 const AVPacket *avpkt)
3245 {
3246     AACContext *ac = avctx->priv_data;
3247     ChannelElement *che = NULL, *che_prev = NULL;
3248     enum RawDataBlockType elem_type, che_prev_type = TYPE_END;
3249     int err, elem_id;
3250     int samples = 0, multiplier, audio_found = 0, pce_found = 0;
3251     int is_dmono, sce_count = 0;
3252     int payload_alignment;
3253     uint8_t che_presence[4][MAX_ELEM_ID] = {{0}};
3254 
3255     ac->frame = frame;
3256 
3257     if (show_bits(gb, 12) == 0xfff) {
3258         if ((err = parse_adts_frame_header(ac, gb)) < 0) {
3259             av_log(avctx, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
3260             goto fail;
3261         }
3262         if (ac->oc[1].m4ac.sampling_index > 12) {
3263             av_log(ac->avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->oc[1].m4ac.sampling_index);
3264             err = AVERROR_INVALIDDATA;
3265             goto fail;
3266         }
3267     }
3268 
3269     if ((err = frame_configure_elements(avctx)) < 0)
3270         goto fail;
3271 
3272     // The FF_PROFILE_AAC_* defines are all object_type - 1
3273     // This may lead to an undefined profile being signaled
3274     ac->avctx->profile = ac->oc[1].m4ac.object_type - 1;
3275 
3276     payload_alignment = get_bits_count(gb);
3277     ac->tags_mapped = 0;
3278     // parse
3279     while ((elem_type = get_bits(gb, 3)) != TYPE_END) {
3280         elem_id = get_bits(gb, 4);
3281 
3282         if (avctx->debug & FF_DEBUG_STARTCODE)
3283             av_log(avctx, AV_LOG_DEBUG, "Elem type:%x id:%x\n", elem_type, elem_id);
3284 
3285         if (!avctx->ch_layout.nb_channels && elem_type != TYPE_PCE) {
3286             err = AVERROR_INVALIDDATA;
3287             goto fail;
3288         }
3289 
3290         if (elem_type < TYPE_DSE) {
3291             if (che_presence[elem_type][elem_id]) {
3292                 int error = che_presence[elem_type][elem_id] > 1;
3293                 av_log(ac->avctx, error ? AV_LOG_ERROR : AV_LOG_DEBUG, "channel element %d.%d duplicate\n",
3294                        elem_type, elem_id);
3295                 if (error) {
3296                     err = AVERROR_INVALIDDATA;
3297                     goto fail;
3298                 }
3299             }
3300             che_presence[elem_type][elem_id]++;
3301 
3302             if (!(che=get_che(ac, elem_type, elem_id))) {
3303                 av_log(ac->avctx, AV_LOG_ERROR, "channel element %d.%d is not allocated\n",
3304                        elem_type, elem_id);
3305                 err = AVERROR_INVALIDDATA;
3306                 goto fail;
3307             }
3308             samples = ac->oc[1].m4ac.frame_length_short ? 960 : 1024;
3309             che->present = 1;
3310         }
3311 
3312         switch (elem_type) {
3313 
3314         case TYPE_SCE:
3315             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
3316             audio_found = 1;
3317             sce_count++;
3318             break;
3319 
3320         case TYPE_CPE:
3321             err = decode_cpe(ac, gb, che);
3322             audio_found = 1;
3323             break;
3324 
3325         case TYPE_CCE:
3326             err = decode_cce(ac, gb, che);
3327             break;
3328 
3329         case TYPE_LFE:
3330             err = decode_ics(ac, &che->ch[0], gb, 0, 0);
3331             audio_found = 1;
3332             break;
3333 
3334         case TYPE_DSE:
3335             err = skip_data_stream_element(ac, gb);
3336             break;
3337 
3338         case TYPE_PCE: {
3339             uint8_t layout_map[MAX_ELEM_ID*4][3] = {{0}};
3340             int tags;
3341 
3342             int pushed = push_output_configuration(ac);
3343             if (pce_found && !pushed) {
3344                 err = AVERROR_INVALIDDATA;
3345                 goto fail;
3346             }
3347 
3348             tags = decode_pce(avctx, &ac->oc[1].m4ac, layout_map, gb,
3349                               payload_alignment);
3350             if (tags < 0) {
3351                 err = tags;
3352                 break;
3353             }
3354             if (pce_found) {
3355                 av_log(avctx, AV_LOG_ERROR,
3356                        "Not evaluating a further program_config_element as this construct is dubious at best.\n");
3357                 pop_output_configuration(ac);
3358             } else {
3359                 err = output_configure(ac, layout_map, tags, OC_TRIAL_PCE, 1);
3360                 if (!err)
3361                     ac->oc[1].m4ac.chan_config = 0;
3362                 pce_found = 1;
3363             }
3364             break;
3365         }
3366 
3367         case TYPE_FIL:
3368             if (elem_id == 15)
3369                 elem_id += get_bits(gb, 8) - 1;
3370             if (get_bits_left(gb) < 8 * elem_id) {
3371                     av_log(avctx, AV_LOG_ERROR, "TYPE_FIL: "overread_err);
3372                     err = AVERROR_INVALIDDATA;
3373                     goto fail;
3374             }
3375             err = 0;
3376             while (elem_id > 0) {
3377                 int ret = decode_extension_payload(ac, gb, elem_id, che_prev, che_prev_type);
3378                 if (ret < 0) {
3379                     err = ret;
3380                     break;
3381                 }
3382                 elem_id -= ret;
3383             }
3384             break;
3385 
3386         default:
3387             err = AVERROR_BUG; /* should not happen, but keeps compiler happy */
3388             break;
3389         }
3390 
3391         if (elem_type < TYPE_DSE) {
3392             che_prev      = che;
3393             che_prev_type = elem_type;
3394         }
3395 
3396         if (err)
3397             goto fail;
3398 
3399         if (get_bits_left(gb) < 3) {
3400             av_log(avctx, AV_LOG_ERROR, overread_err);
3401             err = AVERROR_INVALIDDATA;
3402             goto fail;
3403         }
3404     }
3405 
3406     if (!avctx->ch_layout.nb_channels) {
3407         *got_frame_ptr = 0;
3408         return 0;
3409     }
3410 
3411     multiplier = (ac->oc[1].m4ac.sbr == 1) ? ac->oc[1].m4ac.ext_sample_rate > ac->oc[1].m4ac.sample_rate : 0;
3412     samples <<= multiplier;
3413 
3414     spectral_to_sample(ac, samples);
3415 
3416     if (ac->oc[1].status && audio_found) {
3417         avctx->sample_rate = ac->oc[1].m4ac.sample_rate << multiplier;
3418         avctx->frame_size = samples;
3419         ac->oc[1].status = OC_LOCKED;
3420     }
3421 
3422     if (multiplier)
3423         avctx->internal->skip_samples_multiplier = 2;
3424 
3425     if (!ac->frame->data[0] && samples) {
3426         av_log(avctx, AV_LOG_ERROR, "no frame data found\n");
3427         err = AVERROR_INVALIDDATA;
3428         goto fail;
3429     }
3430 
3431     if (samples) {
3432         ac->frame->nb_samples = samples;
3433         ac->frame->sample_rate = avctx->sample_rate;
3434     } else
3435         av_frame_unref(ac->frame);
3436     *got_frame_ptr = !!samples;
3437 
3438     /* for dual-mono audio (SCE + SCE) */
3439     is_dmono = ac->dmono_mode && sce_count == 2 &&
3440                !av_channel_layout_compare(&ac->oc[1].ch_layout,
3441                                           &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
3442     if (is_dmono) {
3443         if (ac->dmono_mode == 1)
3444             frame->data[1] = frame->data[0];
3445         else if (ac->dmono_mode == 2)
3446             frame->data[0] = frame->data[1];
3447     }
3448 
3449     return 0;
3450 fail:
3451     pop_output_configuration(ac);
3452     return err;
3453 }
3454 
3455 static int aac_decode_frame(AVCodecContext *avctx, AVFrame *frame,
3456                             int *got_frame_ptr, AVPacket *avpkt)
3457 {
3458     AACContext *ac = avctx->priv_data;
3459     const uint8_t *buf = avpkt->data;
3460     int buf_size = avpkt->size;
3461     GetBitContext gb;
3462     int buf_consumed;
3463     int buf_offset;
3464     int err;
3465     size_t new_extradata_size;
3466     const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
3467                                        AV_PKT_DATA_NEW_EXTRADATA,
3468                                        &new_extradata_size);
3469     size_t jp_dualmono_size;
3470     const uint8_t *jp_dualmono   = av_packet_get_side_data(avpkt,
3471                                        AV_PKT_DATA_JP_DUALMONO,
3472                                        &jp_dualmono_size);
3473 
3474     if (new_extradata) {
3475         /* discard previous configuration */
3476         ac->oc[1].status = OC_NONE;
3477         err = decode_audio_specific_config(ac, ac->avctx, &ac->oc[1].m4ac,
3478                                            new_extradata,
3479                                            new_extradata_size * 8LL, 1);
3480         if (err < 0) {
3481             return err;
3482         }
3483     }
3484 
3485     ac->dmono_mode = 0;
3486     if (jp_dualmono && jp_dualmono_size > 0)
3487         ac->dmono_mode =  1 + *jp_dualmono;
3488     if (ac->force_dmono_mode >= 0)
3489         ac->dmono_mode = ac->force_dmono_mode;
3490 
3491     if (INT_MAX / 8 <= buf_size)
3492         return AVERROR_INVALIDDATA;
3493 
3494     if ((err = init_get_bits8(&gb, buf, buf_size)) < 0)
3495         return err;
3496 
3497     switch (ac->oc[1].m4ac.object_type) {
3498     case AOT_ER_AAC_LC:
3499     case AOT_ER_AAC_LTP:
3500     case AOT_ER_AAC_LD:
3501     case AOT_ER_AAC_ELD:
3502         err = aac_decode_er_frame(avctx, frame, got_frame_ptr, &gb);
3503         break;
3504     default:
3505         err = aac_decode_frame_int(avctx, frame, got_frame_ptr, &gb, avpkt);
3506     }
3507     if (err < 0)
3508         return err;
3509 
3510     buf_consumed = (get_bits_count(&gb) + 7) >> 3;
3511     for (buf_offset = buf_consumed; buf_offset < buf_size; buf_offset++)
3512         if (buf[buf_offset])
3513             break;
3514 
3515     return buf_size > buf_offset ? buf_consumed : buf_size;
3516 }
3517 
3518 static av_cold int aac_decode_close(AVCodecContext *avctx)
3519 {
3520     AACContext *ac = avctx->priv_data;
3521     int i, type;
3522 
3523     for (i = 0; i < MAX_ELEM_ID; i++) {
3524         for (type = 0; type < 4; type++) {
3525             if (ac->che[type][i])
3526                 AAC_RENAME(ff_aac_sbr_ctx_close)(&ac->che[type][i]->sbr);
3527             av_freep(&ac->che[type][i]);
3528         }
3529     }
3530 
3531     ff_mdct_end(&ac->mdct);
3532     ff_mdct_end(&ac->mdct_small);
3533     ff_mdct_end(&ac->mdct_ld);
3534     ff_mdct_end(&ac->mdct_ltp);
3535 #if !USE_FIXED
3536     ff_mdct15_uninit(&ac->mdct120);
3537     ff_mdct15_uninit(&ac->mdct480);
3538     ff_mdct15_uninit(&ac->mdct960);
3539 #endif
3540     av_freep(&ac->fdsp);
3541     return 0;
3542 }
3543 
3544 static void aacdec_init(AACContext *c)
3545 {
3546     c->imdct_and_windowing                      = imdct_and_windowing;
3547     c->apply_ltp                                = apply_ltp;
3548     c->apply_tns                                = apply_tns;
3549     c->windowing_and_mdct_ltp                   = windowing_and_mdct_ltp;
3550     c->update_ltp                               = update_ltp;
3551 #if USE_FIXED
3552     c->vector_pow43                             = vector_pow43;
3553     c->subband_scale                            = subband_scale;
3554 #endif
3555 
3556 #if !USE_FIXED
3557 #if ARCH_MIPS
3558     ff_aacdec_init_mips(c);
3559 #endif
3560 #endif /* !USE_FIXED */
3561 }
3562 /**
3563  * AVOptions for Japanese DTV specific extensions (ADTS only)
3564  */
3565 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
3566 static const AVOption options[] = {
3567     {"dual_mono_mode", "Select the channel to decode for dual mono",
3568      offsetof(AACContext, force_dmono_mode), AV_OPT_TYPE_INT, {.i64=-1}, -1, 2,
3569      AACDEC_FLAGS, "dual_mono_mode"},
3570 
3571     {"auto", "autoselection",            0, AV_OPT_TYPE_CONST, {.i64=-1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3572     {"main", "Select Main/Left channel", 0, AV_OPT_TYPE_CONST, {.i64= 1}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3573     {"sub" , "Select Sub/Right channel", 0, AV_OPT_TYPE_CONST, {.i64= 2}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3574     {"both", "Select both channels",     0, AV_OPT_TYPE_CONST, {.i64= 0}, INT_MIN, INT_MAX, AACDEC_FLAGS, "dual_mono_mode"},
3575 
3576     { "channel_order", "Order in which the channels are to be exported",
3577         offsetof(AACContext, output_channel_order), AV_OPT_TYPE_INT,
3578         { .i64 = CHANNEL_ORDER_DEFAULT }, 0, 1, AACDEC_FLAGS, "channel_order" },
3579       { "default", "normal libavcodec channel order", 0, AV_OPT_TYPE_CONST,
3580         { .i64 = CHANNEL_ORDER_DEFAULT }, .flags = AACDEC_FLAGS, "channel_order" },
3581       { "coded",    "order in which the channels are coded in the bitstream",
3582         0, AV_OPT_TYPE_CONST, { .i64 = CHANNEL_ORDER_CODED }, .flags = AACDEC_FLAGS, "channel_order" },
3583 
3584     {NULL},
3585 };
3586 
3587 static const AVClass aac_decoder_class = {
3588     .class_name = "AAC decoder",
3589     .item_name  = av_default_item_name,
3590     .option     = options,
3591     .version    = LIBAVUTIL_VERSION_INT,
3592 };
3593