• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Wmapro compatible decoder
3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
4  * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * @brief wmapro decoder implementation
26  * Wmapro is an MDCT based codec comparable to wma standard or AAC.
27  * The decoding therefore consists of the following steps:
28  * - bitstream decoding
29  * - reconstruction of per-channel data
30  * - rescaling and inverse quantization
31  * - IMDCT
32  * - windowing and overlapp-add
33  *
34  * The compressed wmapro bitstream is split into individual packets.
35  * Every such packet contains one or more wma frames.
36  * The compressed frames may have a variable length and frames may
37  * cross packet boundaries.
38  * Common to all wmapro frames is the number of samples that are stored in
39  * a frame.
40  * The number of samples and a few other decode flags are stored
41  * as extradata that has to be passed to the decoder.
42  *
43  * The wmapro frames themselves are again split into a variable number of
44  * subframes. Every subframe contains the data for 2^N time domain samples
45  * where N varies between 7 and 12.
46  *
47  * Example wmapro bitstream (in samples):
48  *
49  * ||   packet 0           || packet 1 || packet 2      packets
50  * ---------------------------------------------------
51  * || frame 0      || frame 1       || frame 2    ||    frames
52  * ---------------------------------------------------
53  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
54  * ---------------------------------------------------
55  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
56  * ---------------------------------------------------
57  *
58  * The frame layouts for the individual channels of a wma frame does not need
59  * to be the same.
60  *
61  * However, if the offsets and lengths of several subframes of a frame are the
62  * same, the subframes of the channels can be grouped.
63  * Every group may then use special coding techniques like M/S stereo coding
64  * to improve the compression ratio. These channel transformations do not
65  * need to be applied to a whole subframe. Instead, they can also work on
66  * individual scale factor bands (see below).
67  * The coefficients that carry the audio signal in the frequency domain
68  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
69  * In addition to that, the encoder can switch to a runlevel coding scheme
70  * by transmitting subframe_length / 128 zero coefficients.
71  *
72  * Before the audio signal can be converted to the time domain, the
73  * coefficients have to be rescaled and inverse quantized.
74  * A subframe is therefore split into several scale factor bands that get
75  * scaled individually.
76  * Scale factors are submitted for every frame but they might be shared
77  * between the subframes of a channel. Scale factors are initially DPCM-coded.
78  * Once scale factors are shared, the differences are transmitted as runlevel
79  * codes.
80  * Every subframe length and offset combination in the frame layout shares a
81  * common quantization factor that can be adjusted for every channel by a
82  * modifier.
83  * After the inverse quantization, the coefficients get processed by an IMDCT.
84  * The resulting values are then windowed with a sine window and the first half
85  * of the values are added to the second half of the output from the previous
86  * subframe in order to reconstruct the output samples.
87  */
88 
89 #include <inttypes.h>
90 
91 #include "libavutil/audio_fifo.h"
92 #include "libavutil/ffmath.h"
93 #include "libavutil/float_dsp.h"
94 #include "libavutil/intfloat.h"
95 #include "libavutil/intreadwrite.h"
96 #include "libavutil/mem_internal.h"
97 #include "libavutil/thread.h"
98 
99 #include "avcodec.h"
100 #include "codec_internal.h"
101 #include "internal.h"
102 #include "get_bits.h"
103 #include "put_bits.h"
104 #include "wmaprodata.h"
105 #include "sinewin.h"
106 #include "wma.h"
107 #include "wma_common.h"
108 
109 /** current decoder limitations */
110 #define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
111 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
112 #define MAX_BANDS      29                                    ///< max number of scale factor bands
113 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
114 #define XMA_MAX_STREAMS         8
115 #define XMA_MAX_CHANNELS_STREAM 2
116 #define XMA_MAX_CHANNELS        (XMA_MAX_STREAMS * XMA_MAX_CHANNELS_STREAM)
117 
118 #define WMAPRO_BLOCK_MIN_BITS  6                                           ///< log2 of min block size
119 #define WMAPRO_BLOCK_MAX_BITS 13                                           ///< log2 of max block size
120 #define WMAPRO_BLOCK_MIN_SIZE (1 << WMAPRO_BLOCK_MIN_BITS)                 ///< minimum block size
121 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
122 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - WMAPRO_BLOCK_MIN_BITS + 1) ///< possible block sizes
123 
124 
125 #define VLCBITS            9
126 #define SCALEVLCBITS       8
127 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
128 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
129 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
130 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
131 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
132 
133 static VLC              sf_vlc;           ///< scale factor DPCM vlc
134 static VLC              sf_rl_vlc;        ///< scale factor run length vlc
135 static VLC              vec4_vlc;         ///< 4 coefficients per symbol
136 static VLC              vec2_vlc;         ///< 2 coefficients per symbol
137 static VLC              vec1_vlc;         ///< 1 coefficient per symbol
138 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
139 static float            sin64[33];        ///< sine table for decorrelation
140 
141 /**
142  * @brief frame specific decoder context for a single channel
143  */
144 typedef struct WMAProChannelCtx {
145     int16_t  prev_block_len;                          ///< length of the previous block
146     uint8_t  transmit_coefs;
147     uint8_t  num_subframes;
148     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
149     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
150     uint8_t  cur_subframe;                            ///< current subframe number
151     uint16_t decoded_samples;                         ///< number of already processed samples
152     uint8_t  grouped;                                 ///< channel is part of a group
153     int      quant_step;                              ///< quantization step for the current subframe
154     int8_t   reuse_sf;                                ///< share scale factors between subframes
155     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
156     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
157     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
158     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
159     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
160     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
161     float*   coeffs;                                  ///< pointer to the subframe decode buffer
162     uint16_t num_vec_coeffs;                          ///< number of vector coded coefficients
163     DECLARE_ALIGNED(32, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
164 } WMAProChannelCtx;
165 
166 /**
167  * @brief channel group for channel transformations
168  */
169 typedef struct WMAProChannelGrp {
170     uint8_t num_channels;                                     ///< number of channels in the group
171     int8_t  transform;                                        ///< transform on / off
172     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
173     float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
174     float*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
175 } WMAProChannelGrp;
176 
177 /**
178  * @brief main decoder context
179  */
180 typedef struct WMAProDecodeCtx {
181     /* generic decoder variables */
182     AVCodecContext*  avctx;                         ///< codec context for av_log
183     AVFloatDSPContext *fdsp;
184     uint8_t          frame_data[MAX_FRAMESIZE +
185                       AV_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
186     PutBitContext    pb;                            ///< context for filling the frame_data buffer
187     FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
188     DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
189     const float*     windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
190 
191     /* frame size dependent frame information (set during initialization) */
192     uint32_t         decode_flags;                  ///< used compression features
193     uint8_t          len_prefix;                    ///< frame is prefixed with its length
194     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
195     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
196     uint16_t         samples_per_frame;             ///< number of samples to output
197     uint16_t         trim_start;                    ///< number of samples to skip at start
198     uint16_t         trim_end;                      ///< number of samples to skip at end
199     uint16_t         log2_frame_size;
200     int8_t           lfe_channel;                   ///< lfe channel index
201     uint8_t          max_num_subframes;
202     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
203     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
204     uint16_t         min_samples_per_subframe;
205     int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
206     int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
207     int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
208     int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
209 
210     /* packet decode state */
211     GetBitContext    pgb;                           ///< bitstream reader context for the packet
212     int              next_packet_start;             ///< start offset of the next wma packet in the demuxer packet
213     uint8_t          packet_offset;                 ///< frame offset in the packet
214     uint8_t          packet_sequence_number;        ///< current packet number
215     int              num_saved_bits;                ///< saved number of bits
216     int              frame_offset;                  ///< frame offset in the bit reservoir
217     int              subframe_offset;               ///< subframe offset in the bit reservoir
218     uint8_t          packet_loss;                   ///< set in case of bitstream error
219     uint8_t          packet_done;                   ///< set when a packet is fully decoded
220     uint8_t          eof_done;                      ///< set when EOF reached and extra subframe is written (XMA1/2)
221 
222     /* frame decode state */
223     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
224     GetBitContext    gb;                            ///< bitstream reader context
225     int              buf_bit_size;                  ///< buffer size in bits
226     uint8_t          drc_gain;                      ///< gain for the DRC tool
227     int8_t           skip_frame;                    ///< skip output step
228     int8_t           parsed_all_subframes;          ///< all subframes decoded?
229     uint8_t          skip_packets;                  ///< packets to skip to find next packet in a stream (XMA1/2)
230 
231     /* subframe/block decode state */
232     int16_t          subframe_len;                  ///< current subframe length
233     int8_t           nb_channels;                   ///< number of channels in stream (XMA1/2)
234     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
235     int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
236     int8_t           num_bands;                     ///< number of scale factor bands
237     int8_t           transmit_num_vec_coeffs;       ///< number of vector coded coefficients is part of the bitstream
238     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
239     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
240     int8_t           esc_len;                       ///< length of escaped coefficients
241 
242     uint8_t          num_chgroups;                  ///< number of channel groups
243     WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
244 
245     WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
246 } WMAProDecodeCtx;
247 
248 typedef struct XMADecodeCtx {
249     WMAProDecodeCtx xma[XMA_MAX_STREAMS];
250     AVFrame *frames[XMA_MAX_STREAMS];
251     int current_stream;
252     int num_streams;
253     AVAudioFifo *samples[2][XMA_MAX_STREAMS];
254     int start_channel[XMA_MAX_STREAMS];
255     int trim_start, trim_end;
256     int flushed;
257 } XMADecodeCtx;
258 
259 /**
260  *@brief helper function to print the most important members of the context
261  *@param s context
262  */
dump_context(WMAProDecodeCtx * s)263 static av_cold void dump_context(WMAProDecodeCtx *s)
264 {
265 #define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
266 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %"PRIx32"\n", a, b);
267 
268     PRINT("ed sample bit depth", s->bits_per_sample);
269     PRINT_HEX("ed decode flags", s->decode_flags);
270     PRINT("samples per frame",   s->samples_per_frame);
271     PRINT("log2 frame size",     s->log2_frame_size);
272     PRINT("max num subframes",   s->max_num_subframes);
273     PRINT("len prefix",          s->len_prefix);
274     PRINT("num channels",        s->nb_channels);
275 }
276 
277 /**
278  *@brief Uninitialize the decoder and free all resources.
279  *@param avctx codec context
280  *@return 0 on success, < 0 otherwise
281  */
decode_end(WMAProDecodeCtx * s)282 static av_cold int decode_end(WMAProDecodeCtx *s)
283 {
284     int i;
285 
286     av_freep(&s->fdsp);
287 
288     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
289         ff_mdct_end(&s->mdct_ctx[i]);
290 
291     return 0;
292 }
293 
wmapro_decode_end(AVCodecContext * avctx)294 static av_cold int wmapro_decode_end(AVCodecContext *avctx)
295 {
296     WMAProDecodeCtx *s = avctx->priv_data;
297 
298     decode_end(s);
299 
300     return 0;
301 }
302 
get_rate(AVCodecContext * avctx)303 static av_cold int get_rate(AVCodecContext *avctx)
304 {
305     if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { // XXX: is this really only for XMA?
306         if (avctx->sample_rate > 44100)
307             return 48000;
308         else if (avctx->sample_rate > 32000)
309             return 44100;
310         else if (avctx->sample_rate > 24000)
311             return 32000;
312         return 24000;
313     }
314 
315     return avctx->sample_rate;
316 }
317 
decode_init_static(void)318 static av_cold void decode_init_static(void)
319 {
320     INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
321                     scale_huffbits, 1, 1,
322                     scale_huffcodes, 2, 2, 616);
323     INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
324                     scale_rl_huffbits, 1, 1,
325                     scale_rl_huffcodes, 4, 4, 1406);
326     INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
327                     coef0_huffbits, 1, 1,
328                     coef0_huffcodes, 4, 4, 2108);
329     INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
330                     coef1_huffbits, 1, 1,
331                     coef1_huffcodes, 4, 4, 3912);
332     INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
333                     vec4_huffbits, 1, 1,
334                     vec4_huffcodes, 2, 2, 604);
335     INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
336                     vec2_huffbits, 1, 1,
337                     vec2_huffcodes, 2, 2, 562);
338     INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
339                     vec1_huffbits, 1, 1,
340                     vec1_huffcodes, 2, 2, 562);
341 
342     /** calculate sine values for the decorrelation matrix */
343     for (int i = 0; i < 33; i++)
344         sin64[i] = sin(i * M_PI / 64.0);
345 
346     for (int i = WMAPRO_BLOCK_MIN_BITS; i <= WMAPRO_BLOCK_MAX_BITS; i++)
347         ff_init_ff_sine_windows(i);
348 }
349 
350 /**
351  *@brief Initialize the decoder.
352  *@param avctx codec context
353  *@return 0 on success, -1 otherwise
354  */
decode_init(WMAProDecodeCtx * s,AVCodecContext * avctx,int num_stream)355 static av_cold int decode_init(WMAProDecodeCtx *s, AVCodecContext *avctx, int num_stream)
356 {
357     static AVOnce init_static_once = AV_ONCE_INIT;
358     uint8_t *edata_ptr = avctx->extradata;
359     unsigned int channel_mask;
360     int i, bits, ret;
361     int log2_max_num_subframes;
362     int num_possible_block_sizes;
363 
364     if (avctx->codec_id == AV_CODEC_ID_XMA1 || avctx->codec_id == AV_CODEC_ID_XMA2)
365         avctx->block_align = 2048;
366 
367     if (!avctx->block_align) {
368         av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
369         return AVERROR(EINVAL);
370     }
371 
372     s->avctx = avctx;
373 
374     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
375 
376     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
377 
378     /** dump the extradata */
379     av_log(avctx, AV_LOG_DEBUG, "extradata:\n");
380     for (i = 0; i < avctx->extradata_size; i++)
381         av_log(avctx, AV_LOG_DEBUG, "[%x] ", avctx->extradata[i]);
382     av_log(avctx, AV_LOG_DEBUG, "\n");
383 
384     if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
385         s->decode_flags    = 0x10d6;
386         s->bits_per_sample = 16;
387         channel_mask       = 0; //AV_RL32(edata_ptr+2); /* not always in expected order */
388         if ((num_stream+1) * XMA_MAX_CHANNELS_STREAM > avctx->ch_layout.nb_channels) /* stream config is 2ch + 2ch + ... + 1/2ch */
389             s->nb_channels = 1;
390         else
391             s->nb_channels = 2;
392     } else if (avctx->codec_id == AV_CODEC_ID_XMA2) { /* XMA2WAVEFORMAT */
393         s->decode_flags    = 0x10d6;
394         s->bits_per_sample = 16;
395         channel_mask       = 0; /* would need to aggregate from all streams */
396         s->nb_channels = edata_ptr[32 + ((edata_ptr[0]==3)?0:8) + 4*num_stream + 0]; /* nth stream config */
397     } else if (avctx->codec_id == AV_CODEC_ID_XMA1) { /* XMAWAVEFORMAT */
398         s->decode_flags    = 0x10d6;
399         s->bits_per_sample = 16;
400         channel_mask       = 0; /* would need to aggregate from all streams */
401         s->nb_channels     = edata_ptr[8 + 20*num_stream + 17]; /* nth stream config */
402     } else if (avctx->codec_id == AV_CODEC_ID_WMAPRO && avctx->extradata_size >= 18) {
403         s->decode_flags    = AV_RL16(edata_ptr+14);
404         channel_mask       = AV_RL32(edata_ptr+2);
405         s->bits_per_sample = AV_RL16(edata_ptr);
406         s->nb_channels     = channel_mask ? av_popcount(channel_mask) : avctx->ch_layout.nb_channels;
407 
408         if (s->bits_per_sample > 32 || s->bits_per_sample < 1) {
409             avpriv_request_sample(avctx, "bits per sample is %d", s->bits_per_sample);
410             return AVERROR_PATCHWELCOME;
411         }
412     } else {
413         avpriv_request_sample(avctx, "Unknown extradata size");
414         return AVERROR_PATCHWELCOME;
415     }
416 
417     /** generic init */
418     s->log2_frame_size = av_log2(avctx->block_align) + 4;
419     if (s->log2_frame_size > 25) {
420         avpriv_request_sample(avctx, "Large block align");
421         return AVERROR_PATCHWELCOME;
422     }
423 
424     /** frame info */
425     s->skip_frame = 1; /* skip first frame */
426 
427     s->packet_loss = 1;
428     s->len_prefix  = (s->decode_flags & 0x40);
429 
430     /** get frame len */
431     if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
432         bits = ff_wma_get_frame_len_bits(avctx->sample_rate, 3, s->decode_flags);
433         if (bits > WMAPRO_BLOCK_MAX_BITS) {
434             avpriv_request_sample(avctx, "14-bit block sizes");
435             return AVERROR_PATCHWELCOME;
436         }
437         s->samples_per_frame = 1 << bits;
438     } else {
439         s->samples_per_frame = 512;
440     }
441 
442     /** subframe info */
443     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
444     s->max_num_subframes         = 1 << log2_max_num_subframes;
445     if (s->max_num_subframes == 16 || s->max_num_subframes == 4)
446         s->max_subframe_len_bit = 1;
447     s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
448 
449     num_possible_block_sizes     = log2_max_num_subframes + 1;
450     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
451     s->dynamic_range_compression = (s->decode_flags & 0x80);
452 
453     if (s->max_num_subframes > MAX_SUBFRAMES) {
454         av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %"PRId8"\n",
455                s->max_num_subframes);
456         return AVERROR_INVALIDDATA;
457     }
458 
459     if (s->min_samples_per_subframe < WMAPRO_BLOCK_MIN_SIZE) {
460         av_log(avctx, AV_LOG_ERROR, "min_samples_per_subframe of %d too small\n",
461                s->min_samples_per_subframe);
462         return AVERROR_INVALIDDATA;
463     }
464 
465     if (s->avctx->sample_rate <= 0) {
466         av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
467         return AVERROR_INVALIDDATA;
468     }
469 
470     if (s->nb_channels <= 0) {
471         av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n",
472                s->nb_channels);
473         return AVERROR_INVALIDDATA;
474     } else if (avctx->codec_id != AV_CODEC_ID_WMAPRO && s->nb_channels > XMA_MAX_CHANNELS_STREAM) {
475         av_log(avctx, AV_LOG_ERROR, "invalid number of channels per XMA stream %d\n",
476                s->nb_channels);
477         return AVERROR_INVALIDDATA;
478     } else if (s->nb_channels > WMAPRO_MAX_CHANNELS || s->nb_channels > avctx->ch_layout.nb_channels) {
479         avpriv_request_sample(avctx,
480                               "More than %d channels", WMAPRO_MAX_CHANNELS);
481         return AVERROR_PATCHWELCOME;
482     }
483 
484     /** init previous block len */
485     for (i = 0; i < s->nb_channels; i++)
486         s->channel[i].prev_block_len = s->samples_per_frame;
487 
488     /** extract lfe channel position */
489     s->lfe_channel = -1;
490 
491     if (channel_mask & 8) {
492         unsigned int mask;
493         for (mask = 1; mask < 16; mask <<= 1) {
494             if (channel_mask & mask)
495                 ++s->lfe_channel;
496         }
497     }
498 
499     /** calculate number of scale factor bands and their offsets
500         for every possible block size */
501     for (i = 0; i < num_possible_block_sizes; i++) {
502         int subframe_len = s->samples_per_frame >> i;
503         int x;
504         int band = 1;
505         int rate = get_rate(avctx);
506 
507         s->sfb_offsets[i][0] = 0;
508 
509         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
510             int offset = (subframe_len * 2 * critical_freq[x]) / rate + 2;
511             offset &= ~3;
512             if (offset > s->sfb_offsets[i][band - 1])
513                 s->sfb_offsets[i][band++] = offset;
514 
515             if (offset >= subframe_len)
516                 break;
517         }
518         s->sfb_offsets[i][band - 1] = subframe_len;
519         s->num_sfb[i]               = band - 1;
520         if (s->num_sfb[i] <= 0) {
521             av_log(avctx, AV_LOG_ERROR, "num_sfb invalid\n");
522             return AVERROR_INVALIDDATA;
523         }
524     }
525 
526 
527     /** Scale factors can be shared between blocks of different size
528         as every block has a different scale factor band layout.
529         The matrix sf_offsets is needed to find the correct scale factor.
530      */
531 
532     for (i = 0; i < num_possible_block_sizes; i++) {
533         int b;
534         for (b = 0; b < s->num_sfb[i]; b++) {
535             int x;
536             int offset = ((s->sfb_offsets[i][b]
537                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
538             for (x = 0; x < num_possible_block_sizes; x++) {
539                 int v = 0;
540                 while (s->sfb_offsets[x][v + 1] << x < offset) {
541                     v++;
542                     av_assert0(v < MAX_BANDS);
543                 }
544                 s->sf_offsets[i][x][b] = v;
545             }
546         }
547     }
548 
549     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
550     if (!s->fdsp)
551         return AVERROR(ENOMEM);
552 
553     /** init MDCT, FIXME: only init needed sizes */
554     for (int i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
555         ret = ff_mdct_init(&s->mdct_ctx[i], WMAPRO_BLOCK_MIN_BITS + 1 + i, 1,
556                            1.0 / (1 << (WMAPRO_BLOCK_MIN_BITS + i - 1))
557                            / (1ll << (s->bits_per_sample - 1)));
558         if (ret < 0)
559             return ret;
560     }
561 
562     /** init MDCT windows: simple sine window */
563     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
564         const int win_idx = WMAPRO_BLOCK_MAX_BITS - i;
565         s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
566     }
567 
568     /** calculate subwoofer cutoff values */
569     for (i = 0; i < num_possible_block_sizes; i++) {
570         int block_size = s->samples_per_frame >> i;
571         int cutoff = (440*block_size + 3LL * (s->avctx->sample_rate >> 1) - 1)
572                      / s->avctx->sample_rate;
573         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
574     }
575 
576     if (avctx->debug & FF_DEBUG_BITSTREAM)
577         dump_context(s);
578 
579     if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
580         if (channel_mask) {
581             av_channel_layout_uninit(&avctx->ch_layout);
582             av_channel_layout_from_mask(&avctx->ch_layout, channel_mask);
583         } else
584             avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
585     }
586 
587     ff_thread_once(&init_static_once, decode_init_static);
588 
589     return 0;
590 }
591 
592 /**
593  *@brief Initialize the decoder.
594  *@param avctx codec context
595  *@return 0 on success, -1 otherwise
596  */
wmapro_decode_init(AVCodecContext * avctx)597 static av_cold int wmapro_decode_init(AVCodecContext *avctx)
598 {
599     WMAProDecodeCtx *s = avctx->priv_data;
600 
601     return decode_init(s, avctx, 0);
602 }
603 
604 /**
605  *@brief Decode the subframe length.
606  *@param s context
607  *@param offset sample offset in the frame
608  *@return decoded subframe length on success, < 0 in case of an error
609  */
decode_subframe_length(WMAProDecodeCtx * s,int offset)610 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
611 {
612     int frame_len_shift = 0;
613     int subframe_len;
614 
615     /** no need to read from the bitstream when only one length is possible */
616     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
617         return s->min_samples_per_subframe;
618 
619     if (get_bits_left(&s->gb) < 1)
620         return AVERROR_INVALIDDATA;
621 
622     /** 1 bit indicates if the subframe is of maximum length */
623     if (s->max_subframe_len_bit) {
624         if (get_bits1(&s->gb))
625             frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
626     } else
627         frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
628 
629     subframe_len = s->samples_per_frame >> frame_len_shift;
630 
631     /** sanity check the length */
632     if (subframe_len < s->min_samples_per_subframe ||
633         subframe_len > s->samples_per_frame) {
634         av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
635                subframe_len);
636         return AVERROR_INVALIDDATA;
637     }
638     return subframe_len;
639 }
640 
641 /**
642  *@brief Decode how the data in the frame is split into subframes.
643  *       Every WMA frame contains the encoded data for a fixed number of
644  *       samples per channel. The data for every channel might be split
645  *       into several subframes. This function will reconstruct the list of
646  *       subframes for every channel.
647  *
648  *       If the subframes are not evenly split, the algorithm estimates the
649  *       channels with the lowest number of total samples.
650  *       Afterwards, for each of these channels a bit is read from the
651  *       bitstream that indicates if the channel contains a subframe with the
652  *       next subframe size that is going to be read from the bitstream or not.
653  *       If a channel contains such a subframe, the subframe size gets added to
654  *       the channel's subframe list.
655  *       The algorithm repeats these steps until the frame is properly divided
656  *       between the individual channels.
657  *
658  *@param s context
659  *@return 0 on success, < 0 in case of an error
660  */
decode_tilehdr(WMAProDecodeCtx * s)661 static int decode_tilehdr(WMAProDecodeCtx *s)
662 {
663     uint16_t num_samples[WMAPRO_MAX_CHANNELS] = { 0 };/**< sum of samples for all currently known subframes of a channel */
664     uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /**< flag indicating if a channel contains the current subframe */
665     int channels_for_cur_subframe = s->nb_channels;   /**< number of channels that contain the current subframe */
666     int fixed_channel_layout = 0;                     /**< flag indicating that all channels use the same subframe offsets and sizes */
667     int min_channel_len = 0;                          /**< smallest sum of samples (channels with this length will be processed first) */
668     int c;
669 
670     /* Should never consume more than 3073 bits (256 iterations for the
671      * while loop when always the minimum amount of 128 samples is subtracted
672      * from missing samples in the 8 channel case).
673      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
674      */
675 
676     /** reset tiling information */
677     for (c = 0; c < s->nb_channels; c++)
678         s->channel[c].num_subframes = 0;
679 
680     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
681         fixed_channel_layout = 1;
682 
683     /** loop until the frame data is split between the subframes */
684     do {
685         int subframe_len;
686 
687         /** check which channels contain the subframe */
688         for (c = 0; c < s->nb_channels; c++) {
689             if (num_samples[c] == min_channel_len) {
690                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
691                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
692                     contains_subframe[c] = 1;
693                 else
694                     contains_subframe[c] = get_bits1(&s->gb);
695             } else
696                 contains_subframe[c] = 0;
697         }
698 
699         /** get subframe length, subframe_len == 0 is not allowed */
700         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
701             return AVERROR_INVALIDDATA;
702 
703         /** add subframes to the individual channels and find new min_channel_len */
704         min_channel_len += subframe_len;
705         for (c = 0; c < s->nb_channels; c++) {
706             WMAProChannelCtx* chan = &s->channel[c];
707 
708             if (contains_subframe[c]) {
709                 if (chan->num_subframes >= MAX_SUBFRAMES) {
710                     av_log(s->avctx, AV_LOG_ERROR,
711                            "broken frame: num subframes > 31\n");
712                     return AVERROR_INVALIDDATA;
713                 }
714                 chan->subframe_len[chan->num_subframes] = subframe_len;
715                 num_samples[c] += subframe_len;
716                 ++chan->num_subframes;
717                 if (num_samples[c] > s->samples_per_frame) {
718                     av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
719                            "channel len > samples_per_frame\n");
720                     return AVERROR_INVALIDDATA;
721                 }
722             } else if (num_samples[c] <= min_channel_len) {
723                 if (num_samples[c] < min_channel_len) {
724                     channels_for_cur_subframe = 0;
725                     min_channel_len = num_samples[c];
726                 }
727                 ++channels_for_cur_subframe;
728             }
729         }
730     } while (min_channel_len < s->samples_per_frame);
731 
732     for (c = 0; c < s->nb_channels; c++) {
733         int i;
734         int offset = 0;
735         for (i = 0; i < s->channel[c].num_subframes; i++) {
736             ff_dlog(s->avctx, "frame[%"PRIu32"] channel[%i] subframe[%i]"
737                     " len %i\n", s->frame_num, c, i,
738                     s->channel[c].subframe_len[i]);
739             s->channel[c].subframe_offset[i] = offset;
740             offset += s->channel[c].subframe_len[i];
741         }
742     }
743 
744     return 0;
745 }
746 
747 /**
748  *@brief Calculate a decorrelation matrix from the bitstream parameters.
749  *@param s codec context
750  *@param chgroup channel group for which the matrix needs to be calculated
751  */
decode_decorrelation_matrix(WMAProDecodeCtx * s,WMAProChannelGrp * chgroup)752 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
753                                         WMAProChannelGrp *chgroup)
754 {
755     int i;
756     int offset = 0;
757     int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
758     memset(chgroup->decorrelation_matrix, 0, s->nb_channels *
759            s->nb_channels * sizeof(*chgroup->decorrelation_matrix));
760 
761     for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
762         rotation_offset[i] = get_bits(&s->gb, 6);
763 
764     for (i = 0; i < chgroup->num_channels; i++)
765         chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
766             get_bits1(&s->gb) ? 1.0 : -1.0;
767 
768     for (i = 1; i < chgroup->num_channels; i++) {
769         int x;
770         for (x = 0; x < i; x++) {
771             int y;
772             for (y = 0; y < i + 1; y++) {
773                 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
774                 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
775                 int n = rotation_offset[offset + x];
776                 float sinv;
777                 float cosv;
778 
779                 if (n < 32) {
780                     sinv = sin64[n];
781                     cosv = sin64[32 - n];
782                 } else {
783                     sinv =  sin64[64 -  n];
784                     cosv = -sin64[n  - 32];
785                 }
786 
787                 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
788                                                (v1 * sinv) - (v2 * cosv);
789                 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
790                                                (v1 * cosv) + (v2 * sinv);
791             }
792         }
793         offset += i;
794     }
795 }
796 
797 /**
798  *@brief Decode channel transformation parameters
799  *@param s codec context
800  *@return >= 0 in case of success, < 0 in case of bitstream errors
801  */
decode_channel_transform(WMAProDecodeCtx * s)802 static int decode_channel_transform(WMAProDecodeCtx* s)
803 {
804     int i;
805     /* should never consume more than 1921 bits for the 8 channel case
806      * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
807      * + MAX_CHANNELS + MAX_BANDS + 1)
808      */
809 
810     /** in the one channel case channel transforms are pointless */
811     s->num_chgroups = 0;
812     if (s->nb_channels > 1) {
813         int remaining_channels = s->channels_for_cur_subframe;
814 
815         if (get_bits1(&s->gb)) {
816             avpriv_request_sample(s->avctx,
817                                   "Channel transform bit");
818             return AVERROR_PATCHWELCOME;
819         }
820 
821         for (s->num_chgroups = 0; remaining_channels &&
822              s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
823             WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
824             float** channel_data = chgroup->channel_data;
825             chgroup->num_channels = 0;
826             chgroup->transform = 0;
827 
828             /** decode channel mask */
829             if (remaining_channels > 2) {
830                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
831                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
832                     if (!s->channel[channel_idx].grouped
833                         && get_bits1(&s->gb)) {
834                         ++chgroup->num_channels;
835                         s->channel[channel_idx].grouped = 1;
836                         *channel_data++ = s->channel[channel_idx].coeffs;
837                     }
838                 }
839             } else {
840                 chgroup->num_channels = remaining_channels;
841                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
842                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
843                     if (!s->channel[channel_idx].grouped)
844                         *channel_data++ = s->channel[channel_idx].coeffs;
845                     s->channel[channel_idx].grouped = 1;
846                 }
847             }
848 
849             /** decode transform type */
850             if (chgroup->num_channels == 2) {
851                 if (get_bits1(&s->gb)) {
852                     if (get_bits1(&s->gb)) {
853                         avpriv_request_sample(s->avctx,
854                                               "Unknown channel transform type");
855                         return AVERROR_PATCHWELCOME;
856                     }
857                 } else {
858                     chgroup->transform = 1;
859                     if (s->nb_channels == 2) {
860                         chgroup->decorrelation_matrix[0] =  1.0;
861                         chgroup->decorrelation_matrix[1] = -1.0;
862                         chgroup->decorrelation_matrix[2] =  1.0;
863                         chgroup->decorrelation_matrix[3] =  1.0;
864                     } else {
865                         /** cos(pi/4) */
866                         chgroup->decorrelation_matrix[0] =  0.70703125;
867                         chgroup->decorrelation_matrix[1] = -0.70703125;
868                         chgroup->decorrelation_matrix[2] =  0.70703125;
869                         chgroup->decorrelation_matrix[3] =  0.70703125;
870                     }
871                 }
872             } else if (chgroup->num_channels > 2) {
873                 if (get_bits1(&s->gb)) {
874                     chgroup->transform = 1;
875                     if (get_bits1(&s->gb)) {
876                         decode_decorrelation_matrix(s, chgroup);
877                     } else {
878                         /** FIXME: more than 6 coupled channels not supported */
879                         if (chgroup->num_channels > 6) {
880                             avpriv_request_sample(s->avctx,
881                                                   "Coupled channels > 6");
882                         } else {
883                             memcpy(chgroup->decorrelation_matrix,
884                                    default_decorrelation[chgroup->num_channels],
885                                    chgroup->num_channels * chgroup->num_channels *
886                                    sizeof(*chgroup->decorrelation_matrix));
887                         }
888                     }
889                 }
890             }
891 
892             /** decode transform on / off */
893             if (chgroup->transform) {
894                 if (!get_bits1(&s->gb)) {
895                     int i;
896                     /** transform can be enabled for individual bands */
897                     for (i = 0; i < s->num_bands; i++) {
898                         chgroup->transform_band[i] = get_bits1(&s->gb);
899                     }
900                 } else {
901                     memset(chgroup->transform_band, 1, s->num_bands);
902                 }
903             }
904             remaining_channels -= chgroup->num_channels;
905         }
906     }
907     return 0;
908 }
909 
910 /**
911  *@brief Extract the coefficients from the bitstream.
912  *@param s codec context
913  *@param c current channel number
914  *@return 0 on success, < 0 in case of bitstream errors
915  */
decode_coeffs(WMAProDecodeCtx * s,int c)916 static int decode_coeffs(WMAProDecodeCtx *s, int c)
917 {
918     /* Integers 0..15 as single-precision floats.  The table saves a
919        costly int to float conversion, and storing the values as
920        integers allows fast sign-flipping. */
921     static const uint32_t fval_tab[16] = {
922         0x00000000, 0x3f800000, 0x40000000, 0x40400000,
923         0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
924         0x41000000, 0x41100000, 0x41200000, 0x41300000,
925         0x41400000, 0x41500000, 0x41600000, 0x41700000,
926     };
927     int vlctable;
928     VLC* vlc;
929     WMAProChannelCtx* ci = &s->channel[c];
930     int rl_mode = 0;
931     int cur_coeff = 0;
932     int num_zeros = 0;
933     const uint16_t* run;
934     const float* level;
935 
936     ff_dlog(s->avctx, "decode coefficients for channel %i\n", c);
937 
938     vlctable = get_bits1(&s->gb);
939     vlc = &coef_vlc[vlctable];
940 
941     if (vlctable) {
942         run = coef1_run;
943         level = coef1_level;
944     } else {
945         run = coef0_run;
946         level = coef0_level;
947     }
948 
949     /** decode vector coefficients (consumes up to 167 bits per iteration for
950       4 vector coded large values) */
951     while ((s->transmit_num_vec_coeffs || !rl_mode) &&
952            (cur_coeff + 3 < ci->num_vec_coeffs)) {
953         uint32_t vals[4];
954         int i;
955         unsigned int idx;
956 
957         idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
958 
959         if (idx == HUFF_VEC4_SIZE - 1) {
960             for (i = 0; i < 4; i += 2) {
961                 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
962                 if (idx == HUFF_VEC2_SIZE - 1) {
963                     uint32_t v0, v1;
964                     v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
965                     if (v0 == HUFF_VEC1_SIZE - 1)
966                         v0 += ff_wma_get_large_val(&s->gb);
967                     v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
968                     if (v1 == HUFF_VEC1_SIZE - 1)
969                         v1 += ff_wma_get_large_val(&s->gb);
970                     vals[i  ] = av_float2int(v0);
971                     vals[i+1] = av_float2int(v1);
972                 } else {
973                     vals[i]   = fval_tab[symbol_to_vec2[idx] >> 4 ];
974                     vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
975                 }
976             }
977         } else {
978             vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12      ];
979             vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
980             vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
981             vals[3] = fval_tab[ symbol_to_vec4[idx]       & 0xF];
982         }
983 
984         /** decode sign */
985         for (i = 0; i < 4; i++) {
986             if (vals[i]) {
987                 uint32_t sign = get_bits1(&s->gb) - 1;
988                 AV_WN32A(&ci->coeffs[cur_coeff], vals[i] ^ sign << 31);
989                 num_zeros = 0;
990             } else {
991                 ci->coeffs[cur_coeff] = 0;
992                 /** switch to run level mode when subframe_len / 128 zeros
993                     were found in a row */
994                 rl_mode |= (++num_zeros > s->subframe_len >> 8);
995             }
996             ++cur_coeff;
997         }
998     }
999 
1000     /** decode run level coded coefficients */
1001     if (cur_coeff < s->subframe_len) {
1002         int ret;
1003 
1004         memset(&ci->coeffs[cur_coeff], 0,
1005                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
1006         ret = ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
1007                                       level, run, 1, ci->coeffs,
1008                                       cur_coeff, s->subframe_len,
1009                                       s->subframe_len, s->esc_len, 0);
1010         if (ret < 0)
1011             return ret;
1012     }
1013 
1014     return 0;
1015 }
1016 
1017 /**
1018  *@brief Extract scale factors from the bitstream.
1019  *@param s codec context
1020  *@return 0 on success, < 0 in case of bitstream errors
1021  */
decode_scale_factors(WMAProDecodeCtx * s)1022 static int decode_scale_factors(WMAProDecodeCtx* s)
1023 {
1024     int i;
1025 
1026     /** should never consume more than 5344 bits
1027      *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
1028      */
1029 
1030     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1031         int c = s->channel_indexes_for_cur_subframe[i];
1032         int* sf;
1033         int* sf_end;
1034         s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
1035         sf_end = s->channel[c].scale_factors + s->num_bands;
1036 
1037         /** resample scale factors for the new block size
1038          *  as the scale factors might need to be resampled several times
1039          *  before some  new values are transmitted, a backup of the last
1040          *  transmitted scale factors is kept in saved_scale_factors
1041          */
1042         if (s->channel[c].reuse_sf) {
1043             const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
1044             int b;
1045             for (b = 0; b < s->num_bands; b++)
1046                 s->channel[c].scale_factors[b] =
1047                     s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
1048         }
1049 
1050         if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
1051 
1052             if (!s->channel[c].reuse_sf) {
1053                 int val;
1054                 /** decode DPCM coded scale factors */
1055                 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
1056                 val = 45 / s->channel[c].scale_factor_step;
1057                 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
1058                     val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
1059                     *sf = val;
1060                 }
1061             } else {
1062                 int i;
1063                 /** run level decode differences to the resampled factors */
1064                 for (i = 0; i < s->num_bands; i++) {
1065                     int idx;
1066                     int skip;
1067                     int val;
1068                     int sign;
1069 
1070                     idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
1071 
1072                     if (!idx) {
1073                         uint32_t code = get_bits(&s->gb, 14);
1074                         val  =  code >> 6;
1075                         sign = (code & 1) - 1;
1076                         skip = (code & 0x3f) >> 1;
1077                     } else if (idx == 1) {
1078                         break;
1079                     } else {
1080                         skip = scale_rl_run[idx];
1081                         val  = scale_rl_level[idx];
1082                         sign = get_bits1(&s->gb)-1;
1083                     }
1084 
1085                     i += skip;
1086                     if (i >= s->num_bands) {
1087                         av_log(s->avctx, AV_LOG_ERROR,
1088                                "invalid scale factor coding\n");
1089                         return AVERROR_INVALIDDATA;
1090                     }
1091                     s->channel[c].scale_factors[i] += (val ^ sign) - sign;
1092                 }
1093             }
1094             /** swap buffers */
1095             s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
1096             s->channel[c].table_idx = s->table_idx;
1097             s->channel[c].reuse_sf  = 1;
1098         }
1099 
1100         /** calculate new scale factor maximum */
1101         s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
1102         for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
1103             s->channel[c].max_scale_factor =
1104                 FFMAX(s->channel[c].max_scale_factor, *sf);
1105         }
1106 
1107     }
1108     return 0;
1109 }
1110 
1111 /**
1112  *@brief Reconstruct the individual channel data.
1113  *@param s codec context
1114  */
inverse_channel_transform(WMAProDecodeCtx * s)1115 static void inverse_channel_transform(WMAProDecodeCtx *s)
1116 {
1117     int i;
1118 
1119     for (i = 0; i < s->num_chgroups; i++) {
1120         if (s->chgroup[i].transform) {
1121             float data[WMAPRO_MAX_CHANNELS];
1122             const int num_channels = s->chgroup[i].num_channels;
1123             float** ch_data = s->chgroup[i].channel_data;
1124             float** ch_end = ch_data + num_channels;
1125             const int8_t* tb = s->chgroup[i].transform_band;
1126             int16_t* sfb;
1127 
1128             /** multichannel decorrelation */
1129             for (sfb = s->cur_sfb_offsets;
1130                  sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
1131                 int y;
1132                 if (*tb++ == 1) {
1133                     /** multiply values with the decorrelation_matrix */
1134                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
1135                         const float* mat = s->chgroup[i].decorrelation_matrix;
1136                         const float* data_end = data + num_channels;
1137                         float* data_ptr = data;
1138                         float** ch;
1139 
1140                         for (ch = ch_data; ch < ch_end; ch++)
1141                             *data_ptr++ = (*ch)[y];
1142 
1143                         for (ch = ch_data; ch < ch_end; ch++) {
1144                             float sum = 0;
1145                             data_ptr = data;
1146                             while (data_ptr < data_end)
1147                                 sum += *data_ptr++ * *mat++;
1148 
1149                             (*ch)[y] = sum;
1150                         }
1151                     }
1152                 } else if (s->nb_channels == 2) {
1153                     int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1154                     s->fdsp->vector_fmul_scalar(ch_data[0] + sfb[0],
1155                                                ch_data[0] + sfb[0],
1156                                                181.0 / 128, len);
1157                     s->fdsp->vector_fmul_scalar(ch_data[1] + sfb[0],
1158                                                ch_data[1] + sfb[0],
1159                                                181.0 / 128, len);
1160                 }
1161             }
1162         }
1163     }
1164 }
1165 
1166 /**
1167  *@brief Apply sine window and reconstruct the output buffer.
1168  *@param s codec context
1169  */
wmapro_window(WMAProDecodeCtx * s)1170 static void wmapro_window(WMAProDecodeCtx *s)
1171 {
1172     int i;
1173     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1174         int c = s->channel_indexes_for_cur_subframe[i];
1175         const float* window;
1176         int winlen = s->channel[c].prev_block_len;
1177         float* start = s->channel[c].coeffs - (winlen >> 1);
1178 
1179         if (s->subframe_len < winlen) {
1180             start += (winlen - s->subframe_len) >> 1;
1181             winlen = s->subframe_len;
1182         }
1183 
1184         window = s->windows[av_log2(winlen) - WMAPRO_BLOCK_MIN_BITS];
1185 
1186         winlen >>= 1;
1187 
1188         s->fdsp->vector_fmul_window(start, start, start + winlen,
1189                                    window, winlen);
1190 
1191         s->channel[c].prev_block_len = s->subframe_len;
1192     }
1193 }
1194 
1195 /**
1196  *@brief Decode a single subframe (block).
1197  *@param s codec context
1198  *@return 0 on success, < 0 when decoding failed
1199  */
decode_subframe(WMAProDecodeCtx * s)1200 static int decode_subframe(WMAProDecodeCtx *s)
1201 {
1202     int offset = s->samples_per_frame;
1203     int subframe_len = s->samples_per_frame;
1204     int i;
1205     int total_samples   = s->samples_per_frame * s->nb_channels;
1206     int transmit_coeffs = 0;
1207     int cur_subwoofer_cutoff;
1208 
1209     s->subframe_offset = get_bits_count(&s->gb);
1210 
1211     /** reset channel context and find the next block offset and size
1212         == the next block of the channel with the smallest number of
1213         decoded samples
1214     */
1215     for (i = 0; i < s->nb_channels; i++) {
1216         s->channel[i].grouped = 0;
1217         if (offset > s->channel[i].decoded_samples) {
1218             offset = s->channel[i].decoded_samples;
1219             subframe_len =
1220                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1221         }
1222     }
1223 
1224     ff_dlog(s->avctx,
1225             "processing subframe with offset %i len %i\n", offset, subframe_len);
1226 
1227     /** get a list of all channels that contain the estimated block */
1228     s->channels_for_cur_subframe = 0;
1229     for (i = 0; i < s->nb_channels; i++) {
1230         const int cur_subframe = s->channel[i].cur_subframe;
1231         /** subtract already processed samples */
1232         total_samples -= s->channel[i].decoded_samples;
1233 
1234         /** and count if there are multiple subframes that match our profile */
1235         if (offset == s->channel[i].decoded_samples &&
1236             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1237             total_samples -= s->channel[i].subframe_len[cur_subframe];
1238             s->channel[i].decoded_samples +=
1239                 s->channel[i].subframe_len[cur_subframe];
1240             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1241             ++s->channels_for_cur_subframe;
1242         }
1243     }
1244 
1245     /** check if the frame will be complete after processing the
1246         estimated block */
1247     if (!total_samples)
1248         s->parsed_all_subframes = 1;
1249 
1250 
1251     ff_dlog(s->avctx, "subframe is part of %i channels\n",
1252             s->channels_for_cur_subframe);
1253 
1254     /** calculate number of scale factor bands and their offsets */
1255     s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
1256     s->num_bands         = s->num_sfb[s->table_idx];
1257     s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
1258     cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1259 
1260     /** configure the decoder for the current subframe */
1261     offset += s->samples_per_frame >> 1;
1262 
1263     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1264         int c = s->channel_indexes_for_cur_subframe[i];
1265 
1266         s->channel[c].coeffs = &s->channel[c].out[offset];
1267     }
1268 
1269     s->subframe_len = subframe_len;
1270     s->esc_len = av_log2(s->subframe_len - 1) + 1;
1271 
1272     /** skip extended header if any */
1273     if (get_bits1(&s->gb)) {
1274         int num_fill_bits;
1275         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1276             int len = get_bits(&s->gb, 4);
1277             num_fill_bits = get_bitsz(&s->gb, len) + 1;
1278         }
1279 
1280         if (num_fill_bits >= 0) {
1281             if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1282                 av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
1283                 return AVERROR_INVALIDDATA;
1284             }
1285 
1286             skip_bits_long(&s->gb, num_fill_bits);
1287         }
1288     }
1289 
1290     /** no idea for what the following bit is used */
1291     if (get_bits1(&s->gb)) {
1292         avpriv_request_sample(s->avctx, "Reserved bit");
1293         return AVERROR_PATCHWELCOME;
1294     }
1295 
1296 
1297     if (decode_channel_transform(s) < 0)
1298         return AVERROR_INVALIDDATA;
1299 
1300 
1301     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1302         int c = s->channel_indexes_for_cur_subframe[i];
1303         if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1304             transmit_coeffs = 1;
1305     }
1306 
1307     av_assert0(s->subframe_len <= WMAPRO_BLOCK_MAX_SIZE);
1308     if (transmit_coeffs) {
1309         int step;
1310         int quant_step = 90 * s->bits_per_sample >> 4;
1311 
1312         /** decode number of vector coded coefficients */
1313         if ((s->transmit_num_vec_coeffs = get_bits1(&s->gb))) {
1314             int num_bits = av_log2((s->subframe_len + 3)/4) + 1;
1315             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1316                 int c = s->channel_indexes_for_cur_subframe[i];
1317                 int num_vec_coeffs = get_bits(&s->gb, num_bits) << 2;
1318                 if (num_vec_coeffs > s->subframe_len) {
1319                     av_log(s->avctx, AV_LOG_ERROR, "num_vec_coeffs %d is too large\n", num_vec_coeffs);
1320                     return AVERROR_INVALIDDATA;
1321                 }
1322                 av_assert0(num_vec_coeffs + offset <= FF_ARRAY_ELEMS(s->channel[c].out));
1323                 s->channel[c].num_vec_coeffs = num_vec_coeffs;
1324             }
1325         } else {
1326             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1327                 int c = s->channel_indexes_for_cur_subframe[i];
1328                 s->channel[c].num_vec_coeffs = s->subframe_len;
1329             }
1330         }
1331         /** decode quantization step */
1332         step = get_sbits(&s->gb, 6);
1333         quant_step += step;
1334         if (step == -32 || step == 31) {
1335             const int sign = (step == 31) - 1;
1336             int quant = 0;
1337             while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1338                    (step = get_bits(&s->gb, 5)) == 31) {
1339                 quant += 31;
1340             }
1341             quant_step += ((quant + step) ^ sign) - sign;
1342         }
1343         if (quant_step < 0) {
1344             av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
1345         }
1346 
1347         /** decode quantization step modifiers for every channel */
1348 
1349         if (s->channels_for_cur_subframe == 1) {
1350             s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1351         } else {
1352             int modifier_len = get_bits(&s->gb, 3);
1353             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1354                 int c = s->channel_indexes_for_cur_subframe[i];
1355                 s->channel[c].quant_step = quant_step;
1356                 if (get_bits1(&s->gb)) {
1357                     if (modifier_len) {
1358                         s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1359                     } else
1360                         ++s->channel[c].quant_step;
1361                 }
1362             }
1363         }
1364 
1365         /** decode scale factors */
1366         if (decode_scale_factors(s) < 0)
1367             return AVERROR_INVALIDDATA;
1368     }
1369 
1370     ff_dlog(s->avctx, "BITSTREAM: subframe header length was %i\n",
1371             get_bits_count(&s->gb) - s->subframe_offset);
1372 
1373     /** parse coefficients */
1374     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1375         int c = s->channel_indexes_for_cur_subframe[i];
1376         if (s->channel[c].transmit_coefs &&
1377             get_bits_count(&s->gb) < s->num_saved_bits) {
1378             decode_coeffs(s, c);
1379         } else
1380             memset(s->channel[c].coeffs, 0,
1381                    sizeof(*s->channel[c].coeffs) * subframe_len);
1382     }
1383 
1384     ff_dlog(s->avctx, "BITSTREAM: subframe length was %i\n",
1385             get_bits_count(&s->gb) - s->subframe_offset);
1386 
1387     if (transmit_coeffs) {
1388         FFTContext *mdct = &s->mdct_ctx[av_log2(subframe_len) - WMAPRO_BLOCK_MIN_BITS];
1389         /** reconstruct the per channel data */
1390         inverse_channel_transform(s);
1391         for (i = 0; i < s->channels_for_cur_subframe; i++) {
1392             int c = s->channel_indexes_for_cur_subframe[i];
1393             const int* sf = s->channel[c].scale_factors;
1394             int b;
1395 
1396             if (c == s->lfe_channel)
1397                 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1398                        (subframe_len - cur_subwoofer_cutoff));
1399 
1400             /** inverse quantization and rescaling */
1401             for (b = 0; b < s->num_bands; b++) {
1402                 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1403                 const int exp = s->channel[c].quant_step -
1404                             (s->channel[c].max_scale_factor - *sf++) *
1405                             s->channel[c].scale_factor_step;
1406                 const float quant = ff_exp10(exp / 20.0);
1407                 int start = s->cur_sfb_offsets[b];
1408                 s->fdsp->vector_fmul_scalar(s->tmp + start,
1409                                            s->channel[c].coeffs + start,
1410                                            quant, end - start);
1411             }
1412 
1413             /** apply imdct (imdct_half == DCTIV with reverse) */
1414             mdct->imdct_half(mdct, s->channel[c].coeffs, s->tmp);
1415         }
1416     }
1417 
1418     /** window and overlapp-add */
1419     wmapro_window(s);
1420 
1421     /** handled one subframe */
1422     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1423         int c = s->channel_indexes_for_cur_subframe[i];
1424         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1425             av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1426             return AVERROR_INVALIDDATA;
1427         }
1428         ++s->channel[c].cur_subframe;
1429     }
1430 
1431     return 0;
1432 }
1433 
1434 /**
1435  *@brief Decode one WMA frame.
1436  *@param s codec context
1437  *@return 0 if the trailer bit indicates that this is the last frame,
1438  *        1 if there are additional frames
1439  */
decode_frame(WMAProDecodeCtx * s,AVFrame * frame,int * got_frame_ptr)1440 static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
1441 {
1442     GetBitContext* gb = &s->gb;
1443     int more_frames = 0;
1444     int len = 0;
1445     int i;
1446 
1447     /** get frame length */
1448     if (s->len_prefix)
1449         len = get_bits(gb, s->log2_frame_size);
1450 
1451     ff_dlog(s->avctx, "decoding frame with length %x\n", len);
1452 
1453     /** decode tile information */
1454     if (decode_tilehdr(s)) {
1455         s->packet_loss = 1;
1456         return 0;
1457     }
1458 
1459     /** read postproc transform */
1460     if (s->nb_channels > 1 && get_bits1(gb)) {
1461         if (get_bits1(gb)) {
1462             for (i = 0; i < s->nb_channels * s->nb_channels; i++)
1463                 skip_bits(gb, 4);
1464         }
1465     }
1466 
1467     /** read drc info */
1468     if (s->dynamic_range_compression) {
1469         s->drc_gain = get_bits(gb, 8);
1470         ff_dlog(s->avctx, "drc_gain %i\n", s->drc_gain);
1471     }
1472 
1473     if (get_bits1(gb)) {
1474         if (get_bits1(gb))
1475             s->trim_start = get_bits(gb, av_log2(s->samples_per_frame * 2));
1476 
1477         if (get_bits1(gb))
1478             s->trim_end = get_bits(gb, av_log2(s->samples_per_frame * 2));
1479     } else {
1480         s->trim_start = s->trim_end = 0;
1481     }
1482 
1483     ff_dlog(s->avctx, "BITSTREAM: frame header length was %i\n",
1484             get_bits_count(gb) - s->frame_offset);
1485 
1486     /** reset subframe states */
1487     s->parsed_all_subframes = 0;
1488     for (i = 0; i < s->nb_channels; i++) {
1489         s->channel[i].decoded_samples = 0;
1490         s->channel[i].cur_subframe    = 0;
1491         s->channel[i].reuse_sf        = 0;
1492     }
1493 
1494     /** decode all subframes */
1495     while (!s->parsed_all_subframes) {
1496         if (decode_subframe(s) < 0) {
1497             s->packet_loss = 1;
1498             return 0;
1499         }
1500     }
1501 
1502     /** copy samples to the output buffer */
1503     for (i = 0; i < s->nb_channels; i++)
1504         memcpy(frame->extended_data[i], s->channel[i].out,
1505                s->samples_per_frame * sizeof(*s->channel[i].out));
1506 
1507     for (i = 0; i < s->nb_channels; i++) {
1508         /** reuse second half of the IMDCT output for the next frame */
1509         memcpy(&s->channel[i].out[0],
1510                &s->channel[i].out[s->samples_per_frame],
1511                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1512     }
1513 
1514     if (s->skip_frame) {
1515         s->skip_frame = 0;
1516         *got_frame_ptr = 0;
1517         av_frame_unref(frame);
1518     } else {
1519         *got_frame_ptr = 1;
1520     }
1521 
1522     if (s->len_prefix) {
1523         if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1524             /** FIXME: not sure if this is always an error */
1525             av_log(s->avctx, AV_LOG_ERROR,
1526                    "frame[%"PRIu32"] would have to skip %i bits\n",
1527                    s->frame_num,
1528                    len - (get_bits_count(gb) - s->frame_offset) - 1);
1529             s->packet_loss = 1;
1530             return 0;
1531         }
1532 
1533         /** skip the rest of the frame data */
1534         skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1535     } else {
1536         while (get_bits_count(gb) < s->num_saved_bits && get_bits1(gb) == 0) {
1537         }
1538     }
1539 
1540     /** decode trailer bit */
1541     more_frames = get_bits1(gb);
1542 
1543     ++s->frame_num;
1544     return more_frames;
1545 }
1546 
1547 /**
1548  *@brief Calculate remaining input buffer length.
1549  *@param s codec context
1550  *@param gb bitstream reader context
1551  *@return remaining size in bits
1552  */
remaining_bits(WMAProDecodeCtx * s,GetBitContext * gb)1553 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1554 {
1555     return s->buf_bit_size - get_bits_count(gb);
1556 }
1557 
1558 /**
1559  *@brief Fill the bit reservoir with a (partial) frame.
1560  *@param s codec context
1561  *@param gb bitstream reader context
1562  *@param len length of the partial frame
1563  *@param append decides whether to reset the buffer or not
1564  */
save_bits(WMAProDecodeCtx * s,GetBitContext * gb,int len,int append)1565 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1566                       int append)
1567 {
1568     int buflen;
1569 
1570     /** when the frame data does not need to be concatenated, the input buffer
1571         is reset and additional bits from the previous frame are copied
1572         and skipped later so that a fast byte copy is possible */
1573 
1574     if (!append) {
1575         s->frame_offset = get_bits_count(gb) & 7;
1576         s->num_saved_bits = s->frame_offset;
1577         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1578         buflen = (s->num_saved_bits      + len + 7) >> 3;
1579     } else
1580         buflen = (put_bits_count(&s->pb) + len + 7) >> 3;
1581 
1582     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1583         avpriv_request_sample(s->avctx, "Too small input buffer");
1584         s->packet_loss = 1;
1585         return;
1586     }
1587 
1588     av_assert0(len <= put_bits_left(&s->pb));
1589 
1590     s->num_saved_bits += len;
1591     if (!append) {
1592         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1593                      s->num_saved_bits);
1594     } else {
1595         int align = 8 - (get_bits_count(gb) & 7);
1596         align = FFMIN(align, len);
1597         put_bits(&s->pb, align, get_bits(gb, align));
1598         len -= align;
1599         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1600     }
1601     skip_bits_long(gb, len);
1602 
1603     {
1604         PutBitContext tmp = s->pb;
1605         flush_put_bits(&tmp);
1606     }
1607 
1608     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1609     skip_bits(&s->gb, s->frame_offset);
1610 }
1611 
decode_packet(AVCodecContext * avctx,WMAProDecodeCtx * s,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1612 static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s,
1613                          AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
1614 {
1615     GetBitContext* gb  = &s->pgb;
1616     const uint8_t* buf = avpkt->data;
1617     int buf_size       = avpkt->size;
1618     int num_bits_prev_frame;
1619     int packet_sequence_number;
1620     int ret;
1621 
1622     *got_frame_ptr = 0;
1623 
1624     if (!buf_size) {
1625         int i;
1626 
1627         /** Must output remaining samples after stream end. WMAPRO 5.1 created
1628          * by XWMA encoder don't though (maybe only 1/2ch streams need it). */
1629         s->packet_done = 0;
1630         if (s->eof_done)
1631             return 0;
1632 
1633         /** clean output buffer and copy last IMDCT samples */
1634         for (i = 0; i < s->nb_channels; i++) {
1635             memset(frame->extended_data[i], 0,
1636             s->samples_per_frame * sizeof(*s->channel[i].out));
1637 
1638             memcpy(frame->extended_data[i], s->channel[i].out,
1639                    s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1640         }
1641 
1642         s->eof_done = 1;
1643         s->packet_done = 1;
1644         *got_frame_ptr = 1;
1645         return 0;
1646     }
1647     else if (s->packet_done || s->packet_loss) {
1648         s->packet_done = 0;
1649 
1650         /** sanity check for the buffer length */
1651         if (avctx->codec_id == AV_CODEC_ID_WMAPRO && buf_size < avctx->block_align) {
1652             av_log(avctx, AV_LOG_ERROR, "Input packet too small (%d < %d)\n",
1653                    buf_size, avctx->block_align);
1654             s->packet_loss = 1;
1655             return AVERROR_INVALIDDATA;
1656         }
1657 
1658         if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1659             s->next_packet_start = buf_size - avctx->block_align;
1660             buf_size = avctx->block_align;
1661         } else {
1662             s->next_packet_start = buf_size - FFMIN(buf_size, avctx->block_align);
1663             buf_size = FFMIN(buf_size, avctx->block_align);
1664         }
1665         s->buf_bit_size = buf_size << 3;
1666 
1667         /** parse packet header */
1668         ret = init_get_bits8(gb, buf, buf_size);
1669         if (ret < 0)
1670             return ret;
1671         if (avctx->codec_id != AV_CODEC_ID_XMA2) {
1672             packet_sequence_number = get_bits(gb, 4);
1673             skip_bits(gb, 2);
1674         } else {
1675             int num_frames = get_bits(gb, 6);
1676             ff_dlog(avctx, "packet[%d]: number of frames %d\n", avctx->frame_number, num_frames);
1677             packet_sequence_number = 0;
1678         }
1679 
1680         /** get number of bits that need to be added to the previous frame */
1681         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1682         if (avctx->codec_id != AV_CODEC_ID_WMAPRO) {
1683             skip_bits(gb, 3);
1684             s->skip_packets = get_bits(gb, 8);
1685             ff_dlog(avctx, "packet[%d]: skip packets %d\n", avctx->frame_number, s->skip_packets);
1686         }
1687 
1688         ff_dlog(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
1689                 num_bits_prev_frame);
1690 
1691         /** check for packet loss */
1692         if (avctx->codec_id == AV_CODEC_ID_WMAPRO && !s->packet_loss &&
1693             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1694             s->packet_loss = 1;
1695             av_log(avctx, AV_LOG_ERROR,
1696                    "Packet loss detected! seq %"PRIx8" vs %x\n",
1697                    s->packet_sequence_number, packet_sequence_number);
1698         }
1699         s->packet_sequence_number = packet_sequence_number;
1700 
1701         if (num_bits_prev_frame > 0) {
1702             int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1703             if (num_bits_prev_frame >= remaining_packet_bits) {
1704                 num_bits_prev_frame = remaining_packet_bits;
1705                 s->packet_done = 1;
1706             }
1707 
1708             /** append the previous frame data to the remaining data from the
1709                 previous packet to create a full frame */
1710             save_bits(s, gb, num_bits_prev_frame, 1);
1711             ff_dlog(avctx, "accumulated %x bits of frame data\n",
1712                     s->num_saved_bits - s->frame_offset);
1713 
1714             /** decode the cross packet frame if it is valid */
1715             if (!s->packet_loss)
1716                 decode_frame(s, frame, got_frame_ptr);
1717         } else if (s->num_saved_bits - s->frame_offset) {
1718             ff_dlog(avctx, "ignoring %x previously saved bits\n",
1719                     s->num_saved_bits - s->frame_offset);
1720         }
1721 
1722         if (s->packet_loss) {
1723             /** reset number of saved bits so that the decoder
1724                 does not start to decode incomplete frames in the
1725                 s->len_prefix == 0 case */
1726             s->num_saved_bits = 0;
1727             s->packet_loss = 0;
1728         }
1729     } else {
1730         int frame_size;
1731 
1732         if (avpkt->size < s->next_packet_start) {
1733             s->packet_loss = 1;
1734             return AVERROR_INVALIDDATA;
1735         }
1736 
1737         s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1738         ret = init_get_bits8(gb, avpkt->data, avpkt->size - s->next_packet_start);
1739         if (ret < 0)
1740             return ret;
1741         skip_bits(gb, s->packet_offset);
1742         if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1743             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1744             frame_size <= remaining_bits(s, gb)) {
1745             save_bits(s, gb, frame_size, 0);
1746             if (!s->packet_loss)
1747                 s->packet_done = !decode_frame(s, frame, got_frame_ptr);
1748         } else if (!s->len_prefix
1749                    && s->num_saved_bits > get_bits_count(&s->gb)) {
1750             /** when the frames do not have a length prefix, we don't know
1751                 the compressed length of the individual frames
1752                 however, we know what part of a new packet belongs to the
1753                 previous frame
1754                 therefore we save the incoming packet first, then we append
1755                 the "previous frame" data from the next packet so that
1756                 we get a buffer that only contains full frames */
1757             s->packet_done = !decode_frame(s, frame, got_frame_ptr);
1758         } else {
1759             s->packet_done = 1;
1760         }
1761     }
1762 
1763     if (remaining_bits(s, gb) < 0) {
1764         av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -remaining_bits(s, gb));
1765         s->packet_loss = 1;
1766     }
1767 
1768     if (s->packet_done && !s->packet_loss &&
1769         remaining_bits(s, gb) > 0) {
1770         /** save the rest of the data so that it can be decoded
1771             with the next packet */
1772         save_bits(s, gb, remaining_bits(s, gb), 0);
1773     }
1774 
1775     s->packet_offset = get_bits_count(gb) & 7;
1776     if (s->packet_loss)
1777         return AVERROR_INVALIDDATA;
1778 
1779     if (s->trim_start && avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1780         if (s->trim_start < frame->nb_samples) {
1781             for (int ch = 0; ch < frame->ch_layout.nb_channels; ch++)
1782                 frame->extended_data[ch] += s->trim_start * 4;
1783 
1784             frame->nb_samples -= s->trim_start;
1785         } else {
1786             *got_frame_ptr = 0;
1787         }
1788 
1789         s->trim_start = 0;
1790     }
1791 
1792     if (s->trim_end && avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1793         if (s->trim_end < frame->nb_samples) {
1794             frame->nb_samples -= s->trim_end;
1795         } else {
1796             *got_frame_ptr = 0;
1797         }
1798 
1799         s->trim_end = 0;
1800     }
1801 
1802     return get_bits_count(gb) >> 3;
1803 }
1804 
1805 /**
1806  *@brief Decode a single WMA packet.
1807  *@param avctx codec context
1808  *@param data the output buffer
1809  *@param avpkt input packet
1810  *@return number of bytes that were read from the input buffer
1811  */
wmapro_decode_packet(AVCodecContext * avctx,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1812 static int wmapro_decode_packet(AVCodecContext *avctx, AVFrame *frame,
1813                                 int *got_frame_ptr, AVPacket *avpkt)
1814 {
1815     WMAProDecodeCtx *s = avctx->priv_data;
1816     int ret;
1817 
1818     /* get output buffer */
1819     frame->nb_samples = s->samples_per_frame;
1820     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
1821         s->packet_loss = 1;
1822         return 0;
1823     }
1824 
1825     return decode_packet(avctx, s, frame, got_frame_ptr, avpkt);
1826 }
1827 
xma_decode_packet(AVCodecContext * avctx,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1828 static int xma_decode_packet(AVCodecContext *avctx, AVFrame *frame,
1829                              int *got_frame_ptr, AVPacket *avpkt)
1830 {
1831     XMADecodeCtx *s = avctx->priv_data;
1832     int got_stream_frame_ptr = 0;
1833     int i, ret = 0, eof = 0;
1834 
1835     if (!s->frames[s->current_stream]->data[0]) {
1836         avctx->internal->skip_samples = 64;
1837         s->frames[s->current_stream]->nb_samples = 512;
1838         if ((ret = ff_get_buffer(avctx, s->frames[s->current_stream], 0)) < 0)
1839             return ret;
1840     } else if (s->frames[s->current_stream]->nb_samples != 512) {
1841         avctx->internal->skip_samples = 64;
1842         av_frame_unref(s->frames[s->current_stream]);
1843         s->frames[s->current_stream]->nb_samples = 512;
1844         if ((ret = ff_get_buffer(avctx, s->frames[s->current_stream], 0)) < 0)
1845             return ret;
1846     }
1847     /* decode current stream packet */
1848     if (!s->xma[s->current_stream].eof_done) {
1849         ret = decode_packet(avctx, &s->xma[s->current_stream], s->frames[s->current_stream],
1850                             &got_stream_frame_ptr, avpkt);
1851     }
1852 
1853     if (!avpkt->size) {
1854         eof = 1;
1855 
1856         for (i = 0; i < s->num_streams; i++) {
1857             if (!s->xma[i].eof_done && s->frames[i]->data[0]) {
1858                 ret = decode_packet(avctx, &s->xma[i], s->frames[i],
1859                                     &got_stream_frame_ptr, avpkt);
1860             }
1861 
1862             eof &= s->xma[i].eof_done;
1863         }
1864     }
1865 
1866     if (s->xma[0].trim_start)
1867         s->trim_start = s->xma[0].trim_start;
1868     if (s->xma[0].trim_end)
1869         s->trim_end = s->xma[0].trim_end;
1870 
1871     /* copy stream samples (1/2ch) to sample buffer (Nch) */
1872     if (got_stream_frame_ptr) {
1873         const int nb_samples = s->frames[s->current_stream]->nb_samples;
1874         void *left[1] = { s->frames[s->current_stream]->extended_data[0] };
1875         void *right[1] = { s->frames[s->current_stream]->extended_data[1] };
1876 
1877         av_audio_fifo_write(s->samples[0][s->current_stream], left, nb_samples);
1878         if (s->xma[s->current_stream].nb_channels > 1)
1879             av_audio_fifo_write(s->samples[1][s->current_stream], right, nb_samples);
1880     } else if (ret < 0) {
1881         s->current_stream = 0;
1882         return ret;
1883     }
1884 
1885     /* find next XMA packet's owner stream, and update.
1886      * XMA streams find their packets following packet_skips
1887      * (at start there is one packet per stream, then interleave non-linearly). */
1888     if (s->xma[s->current_stream].packet_done ||
1889         s->xma[s->current_stream].packet_loss) {
1890         int nb_samples = INT_MAX;
1891 
1892         /* select stream with 0 skip_packets (= uses next packet) */
1893         if (s->xma[s->current_stream].skip_packets != 0) {
1894             int min[2];
1895 
1896             min[0] = s->xma[0].skip_packets;
1897             min[1] = i = 0;
1898 
1899             for (i = 1; i < s->num_streams; i++) {
1900                 if (s->xma[i].skip_packets < min[0]) {
1901                     min[0] = s->xma[i].skip_packets;
1902                     min[1] = i;
1903                 }
1904             }
1905 
1906             s->current_stream = min[1];
1907         }
1908 
1909         /* all other streams skip next packet */
1910         for (i = 0; i < s->num_streams; i++) {
1911             s->xma[i].skip_packets = FFMAX(0, s->xma[i].skip_packets - 1);
1912             nb_samples = FFMIN(nb_samples, av_audio_fifo_size(s->samples[0][i]));
1913         }
1914 
1915         if (!eof && avpkt->size)
1916             nb_samples -= FFMIN(nb_samples, 4096);
1917 
1918         /* copy samples from buffer to output if possible */
1919         if ((nb_samples > 0 || eof || !avpkt->size) && !s->flushed) {
1920             int bret;
1921 
1922             if (eof) {
1923                 nb_samples -= av_clip(s->trim_end + s->trim_start - 128 - 64, 0, nb_samples);
1924                 s->flushed = 1;
1925             }
1926 
1927             frame->nb_samples = nb_samples;
1928             if ((bret = ff_get_buffer(avctx, frame, 0)) < 0)
1929                 return bret;
1930 
1931             for (i = 0; i < s->num_streams; i++) {
1932                 const int start_ch = s->start_channel[i];
1933                 void *left[1] = { frame->extended_data[start_ch + 0] };
1934 
1935                 av_audio_fifo_read(s->samples[0][i], left, nb_samples);
1936                 if (s->xma[i].nb_channels > 1) {
1937                     void *right[1] = { frame->extended_data[start_ch + 1] };
1938                     av_audio_fifo_read(s->samples[1][i], right, nb_samples);
1939                 }
1940             }
1941 
1942             *got_frame_ptr = nb_samples > 0;
1943         }
1944     }
1945 
1946     return ret;
1947 }
1948 
xma_decode_init(AVCodecContext * avctx)1949 static av_cold int xma_decode_init(AVCodecContext *avctx)
1950 {
1951     XMADecodeCtx *s = avctx->priv_data;
1952     int i, ret, start_channels = 0;
1953 
1954     if (avctx->ch_layout.nb_channels <= 0 || avctx->extradata_size == 0)
1955         return AVERROR_INVALIDDATA;
1956 
1957     /* get stream config */
1958     if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
1959         unsigned int channel_mask = AV_RL32(avctx->extradata + 2);
1960         if (channel_mask) {
1961             av_channel_layout_uninit(&avctx->ch_layout);
1962             av_channel_layout_from_mask(&avctx->ch_layout, channel_mask);
1963         } else
1964             avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
1965         s->num_streams = AV_RL16(avctx->extradata);
1966     } else if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size >= 2) { /* XMA2WAVEFORMAT */
1967         s->num_streams = avctx->extradata[1];
1968         if (avctx->extradata_size != (32 + ((avctx->extradata[0]==3)?0:8) + 4*s->num_streams)) {
1969             av_log(avctx, AV_LOG_ERROR, "Incorrect XMA2 extradata size\n");
1970             s->num_streams = 0;
1971             return AVERROR(EINVAL);
1972         }
1973     } else if (avctx->codec_id == AV_CODEC_ID_XMA1 && avctx->extradata_size >= 4) { /* XMAWAVEFORMAT */
1974         s->num_streams = avctx->extradata[4];
1975         if (avctx->extradata_size != (8 + 20*s->num_streams)) {
1976             av_log(avctx, AV_LOG_ERROR, "Incorrect XMA1 extradata size\n");
1977             s->num_streams = 0;
1978             return AVERROR(EINVAL);
1979         }
1980     } else {
1981         av_log(avctx, AV_LOG_ERROR, "Incorrect XMA config\n");
1982         return AVERROR(EINVAL);
1983     }
1984 
1985     /* encoder supports up to 64 streams / 64*2 channels (would have to alloc arrays) */
1986     if (avctx->ch_layout.nb_channels > XMA_MAX_CHANNELS || s->num_streams > XMA_MAX_STREAMS ||
1987         s->num_streams <= 0
1988     ) {
1989         avpriv_request_sample(avctx, "More than %d channels in %d streams", XMA_MAX_CHANNELS, s->num_streams);
1990         s->num_streams = 0;
1991         return AVERROR_PATCHWELCOME;
1992     }
1993 
1994     /* init all streams (several streams of 1/2ch make Nch files) */
1995     for (i = 0; i < s->num_streams; i++) {
1996         ret = decode_init(&s->xma[i], avctx, i);
1997         if (ret < 0)
1998             return ret;
1999         s->frames[i] = av_frame_alloc();
2000         if (!s->frames[i])
2001             return AVERROR(ENOMEM);
2002 
2003         s->start_channel[i] = start_channels;
2004         start_channels += s->xma[i].nb_channels;
2005     }
2006     if (start_channels != avctx->ch_layout.nb_channels)
2007         return AVERROR_INVALIDDATA;
2008 
2009     for (int i = 0; i < XMA_MAX_STREAMS; i++) {
2010         s->samples[0][i] = av_audio_fifo_alloc(avctx->sample_fmt, 1, 64 * 512);
2011         s->samples[1][i] = av_audio_fifo_alloc(avctx->sample_fmt, 1, 64 * 512);
2012         if (!s->samples[0][i] || !s->samples[1][i])
2013             return AVERROR(ENOMEM);
2014     }
2015 
2016     return ret;
2017 }
2018 
xma_decode_end(AVCodecContext * avctx)2019 static av_cold int xma_decode_end(AVCodecContext *avctx)
2020 {
2021     XMADecodeCtx *s = avctx->priv_data;
2022     int i;
2023 
2024     for (i = 0; i < s->num_streams; i++) {
2025         decode_end(&s->xma[i]);
2026         av_frame_free(&s->frames[i]);
2027     }
2028     s->num_streams = 0;
2029 
2030     for (i = 0; i < XMA_MAX_STREAMS; i++) {
2031         av_audio_fifo_free(s->samples[0][i]);
2032         av_audio_fifo_free(s->samples[1][i]);
2033     }
2034 
2035     return 0;
2036 }
2037 
flush(WMAProDecodeCtx * s)2038 static void flush(WMAProDecodeCtx *s)
2039 {
2040     int i;
2041     /** reset output buffer as a part of it is used during the windowing of a
2042         new frame */
2043     for (i = 0; i < s->nb_channels; i++)
2044         memset(s->channel[i].out, 0, s->samples_per_frame *
2045                sizeof(*s->channel[i].out));
2046     s->packet_loss = 1;
2047     s->skip_packets = 0;
2048     s->eof_done = 0;
2049     s->skip_frame = 1;
2050 }
2051 
2052 /**
2053  *@brief Clear decoder buffers (for seeking).
2054  *@param avctx codec context
2055  */
wmapro_flush(AVCodecContext * avctx)2056 static void wmapro_flush(AVCodecContext *avctx)
2057 {
2058     WMAProDecodeCtx *s = avctx->priv_data;
2059 
2060     flush(s);
2061 }
2062 
xma_flush(AVCodecContext * avctx)2063 static void xma_flush(AVCodecContext *avctx)
2064 {
2065     XMADecodeCtx *s = avctx->priv_data;
2066     int i;
2067 
2068     for (i = 0; i < XMA_MAX_STREAMS; i++) {
2069         av_audio_fifo_reset(s->samples[0][i]);
2070         av_audio_fifo_reset(s->samples[1][i]);
2071     }
2072 
2073     for (i = 0; i < s->num_streams; i++)
2074         flush(&s->xma[i]);
2075 
2076     s->current_stream = 0;
2077     s->flushed = 0;
2078 }
2079 
2080 /**
2081  *@brief wmapro decoder
2082  */
2083 const FFCodec ff_wmapro_decoder = {
2084     .p.name         = "wmapro",
2085     .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
2086     .p.type         = AVMEDIA_TYPE_AUDIO,
2087     .p.id           = AV_CODEC_ID_WMAPRO,
2088     .priv_data_size = sizeof(WMAProDecodeCtx),
2089     .init           = wmapro_decode_init,
2090     .close          = wmapro_decode_end,
2091     FF_CODEC_DECODE_CB(wmapro_decode_packet),
2092     .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
2093     .flush          = wmapro_flush,
2094     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2095                                                       AV_SAMPLE_FMT_NONE },
2096     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2097 };
2098 
2099 const FFCodec ff_xma1_decoder = {
2100     .p.name         = "xma1",
2101     .p.long_name    = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
2102     .p.type         = AVMEDIA_TYPE_AUDIO,
2103     .p.id           = AV_CODEC_ID_XMA1,
2104     .priv_data_size = sizeof(XMADecodeCtx),
2105     .init           = xma_decode_init,
2106     .close          = xma_decode_end,
2107     FF_CODEC_DECODE_CB(xma_decode_packet),
2108     .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2109     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2110                                                       AV_SAMPLE_FMT_NONE },
2111     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2112 };
2113 
2114 const FFCodec ff_xma2_decoder = {
2115     .p.name         = "xma2",
2116     .p.long_name    = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
2117     .p.type         = AVMEDIA_TYPE_AUDIO,
2118     .p.id           = AV_CODEC_ID_XMA2,
2119     .priv_data_size = sizeof(XMADecodeCtx),
2120     .init           = xma_decode_init,
2121     .close          = xma_decode_end,
2122     FF_CODEC_DECODE_CB(xma_decode_packet),
2123     .flush          = xma_flush,
2124     .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2125     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2126                                                       AV_SAMPLE_FMT_NONE },
2127     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2128 };
2129