• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Wmapro compatible decoder
3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
4  * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 /**
24  * @file
25  * @brief wmapro decoder implementation
26  * Wmapro is an MDCT based codec comparable to wma standard or AAC.
27  * The decoding therefore consists of the following steps:
28  * - bitstream decoding
29  * - reconstruction of per-channel data
30  * - rescaling and inverse quantization
31  * - IMDCT
32  * - windowing and overlapp-add
33  *
34  * The compressed wmapro bitstream is split into individual packets.
35  * Every such packet contains one or more wma frames.
36  * The compressed frames may have a variable length and frames may
37  * cross packet boundaries.
38  * Common to all wmapro frames is the number of samples that are stored in
39  * a frame.
40  * The number of samples and a few other decode flags are stored
41  * as extradata that has to be passed to the decoder.
42  *
43  * The wmapro frames themselves are again split into a variable number of
44  * subframes. Every subframe contains the data for 2^N time domain samples
45  * where N varies between 7 and 12.
46  *
47  * Example wmapro bitstream (in samples):
48  *
49  * ||   packet 0           || packet 1 || packet 2      packets
50  * ---------------------------------------------------
51  * || frame 0      || frame 1       || frame 2    ||    frames
52  * ---------------------------------------------------
53  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
54  * ---------------------------------------------------
55  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
56  * ---------------------------------------------------
57  *
58  * The frame layouts for the individual channels of a wma frame does not need
59  * to be the same.
60  *
61  * However, if the offsets and lengths of several subframes of a frame are the
62  * same, the subframes of the channels can be grouped.
63  * Every group may then use special coding techniques like M/S stereo coding
64  * to improve the compression ratio. These channel transformations do not
65  * need to be applied to a whole subframe. Instead, they can also work on
66  * individual scale factor bands (see below).
67  * The coefficients that carry the audio signal in the frequency domain
68  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
69  * In addition to that, the encoder can switch to a runlevel coding scheme
70  * by transmitting subframe_length / 128 zero coefficients.
71  *
72  * Before the audio signal can be converted to the time domain, the
73  * coefficients have to be rescaled and inverse quantized.
74  * A subframe is therefore split into several scale factor bands that get
75  * scaled individually.
76  * Scale factors are submitted for every frame but they might be shared
77  * between the subframes of a channel. Scale factors are initially DPCM-coded.
78  * Once scale factors are shared, the differences are transmitted as runlevel
79  * codes.
80  * Every subframe length and offset combination in the frame layout shares a
81  * common quantization factor that can be adjusted for every channel by a
82  * modifier.
83  * After the inverse quantization, the coefficients get processed by an IMDCT.
84  * The resulting values are then windowed with a sine window and the first half
85  * of the values are added to the second half of the output from the previous
86  * subframe in order to reconstruct the output samples.
87  */
88 
89 #include <inttypes.h>
90 
91 #include "libavutil/ffmath.h"
92 #include "libavutil/float_dsp.h"
93 #include "libavutil/intfloat.h"
94 #include "libavutil/intreadwrite.h"
95 #include "libavutil/mem_internal.h"
96 
97 #include "avcodec.h"
98 #include "internal.h"
99 #include "get_bits.h"
100 #include "put_bits.h"
101 #include "wmaprodata.h"
102 #include "sinewin.h"
103 #include "wma.h"
104 #include "wma_common.h"
105 
106 /** current decoder limitations */
107 #define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
108 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
109 #define MAX_BANDS      29                                    ///< max number of scale factor bands
110 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
111 #define XMA_MAX_STREAMS         8
112 #define XMA_MAX_CHANNELS_STREAM 2
113 #define XMA_MAX_CHANNELS        (XMA_MAX_STREAMS * XMA_MAX_CHANNELS_STREAM)
114 
115 #define WMAPRO_BLOCK_MIN_BITS  6                                           ///< log2 of min block size
116 #define WMAPRO_BLOCK_MAX_BITS 13                                           ///< log2 of max block size
117 #define WMAPRO_BLOCK_MIN_SIZE (1 << WMAPRO_BLOCK_MIN_BITS)                 ///< minimum block size
118 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
119 #define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - WMAPRO_BLOCK_MIN_BITS + 1) ///< possible block sizes
120 
121 
122 #define VLCBITS            9
123 #define SCALEVLCBITS       8
124 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
125 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
126 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
127 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
128 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
129 
130 static VLC              sf_vlc;           ///< scale factor DPCM vlc
131 static VLC              sf_rl_vlc;        ///< scale factor run length vlc
132 static VLC              vec4_vlc;         ///< 4 coefficients per symbol
133 static VLC              vec2_vlc;         ///< 2 coefficients per symbol
134 static VLC              vec1_vlc;         ///< 1 coefficient per symbol
135 static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
136 static float            sin64[33];        ///< sine table for decorrelation
137 
138 /**
139  * @brief frame specific decoder context for a single channel
140  */
141 typedef struct WMAProChannelCtx {
142     int16_t  prev_block_len;                          ///< length of the previous block
143     uint8_t  transmit_coefs;
144     uint8_t  num_subframes;
145     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
146     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
147     uint8_t  cur_subframe;                            ///< current subframe number
148     uint16_t decoded_samples;                         ///< number of already processed samples
149     uint8_t  grouped;                                 ///< channel is part of a group
150     int      quant_step;                              ///< quantization step for the current subframe
151     int8_t   reuse_sf;                                ///< share scale factors between subframes
152     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
153     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
154     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
155     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
156     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
157     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
158     float*   coeffs;                                  ///< pointer to the subframe decode buffer
159     uint16_t num_vec_coeffs;                          ///< number of vector coded coefficients
160     DECLARE_ALIGNED(32, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
161 } WMAProChannelCtx;
162 
163 /**
164  * @brief channel group for channel transformations
165  */
166 typedef struct WMAProChannelGrp {
167     uint8_t num_channels;                                     ///< number of channels in the group
168     int8_t  transform;                                        ///< transform on / off
169     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
170     float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
171     float*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
172 } WMAProChannelGrp;
173 
174 /**
175  * @brief main decoder context
176  */
177 typedef struct WMAProDecodeCtx {
178     /* generic decoder variables */
179     AVCodecContext*  avctx;                         ///< codec context for av_log
180     AVFloatDSPContext *fdsp;
181     uint8_t          frame_data[MAX_FRAMESIZE +
182                       AV_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
183     PutBitContext    pb;                            ///< context for filling the frame_data buffer
184     FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
185     DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
186     const float*     windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
187 
188     /* frame size dependent frame information (set during initialization) */
189     uint32_t         decode_flags;                  ///< used compression features
190     uint8_t          len_prefix;                    ///< frame is prefixed with its length
191     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
192     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
193     uint16_t         samples_per_frame;             ///< number of samples to output
194     uint16_t         log2_frame_size;
195     int8_t           lfe_channel;                   ///< lfe channel index
196     uint8_t          max_num_subframes;
197     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
198     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
199     uint16_t         min_samples_per_subframe;
200     int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
201     int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
202     int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
203     int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
204 
205     /* packet decode state */
206     GetBitContext    pgb;                           ///< bitstream reader context for the packet
207     int              next_packet_start;             ///< start offset of the next wma packet in the demuxer packet
208     uint8_t          packet_offset;                 ///< frame offset in the packet
209     uint8_t          packet_sequence_number;        ///< current packet number
210     int              num_saved_bits;                ///< saved number of bits
211     int              frame_offset;                  ///< frame offset in the bit reservoir
212     int              subframe_offset;               ///< subframe offset in the bit reservoir
213     uint8_t          packet_loss;                   ///< set in case of bitstream error
214     uint8_t          packet_done;                   ///< set when a packet is fully decoded
215     uint8_t          eof_done;                      ///< set when EOF reached and extra subframe is written (XMA1/2)
216 
217     /* frame decode state */
218     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
219     GetBitContext    gb;                            ///< bitstream reader context
220     int              buf_bit_size;                  ///< buffer size in bits
221     uint8_t          drc_gain;                      ///< gain for the DRC tool
222     int8_t           skip_frame;                    ///< skip output step
223     int8_t           parsed_all_subframes;          ///< all subframes decoded?
224     uint8_t          skip_packets;                  ///< packets to skip to find next packet in a stream (XMA1/2)
225 
226     /* subframe/block decode state */
227     int16_t          subframe_len;                  ///< current subframe length
228     int8_t           nb_channels;                   ///< number of channels in stream (XMA1/2)
229     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
230     int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
231     int8_t           num_bands;                     ///< number of scale factor bands
232     int8_t           transmit_num_vec_coeffs;       ///< number of vector coded coefficients is part of the bitstream
233     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
234     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
235     int8_t           esc_len;                       ///< length of escaped coefficients
236 
237     uint8_t          num_chgroups;                  ///< number of channel groups
238     WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
239 
240     WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
241 } WMAProDecodeCtx;
242 
243 typedef struct XMADecodeCtx {
244     WMAProDecodeCtx xma[XMA_MAX_STREAMS];
245     AVFrame *frames[XMA_MAX_STREAMS];
246     int current_stream;
247     int num_streams;
248     float samples[XMA_MAX_CHANNELS][512 * 64];
249     int offset[XMA_MAX_STREAMS];
250     int start_channel[XMA_MAX_STREAMS];
251 } XMADecodeCtx;
252 
253 /**
254  *@brief helper function to print the most important members of the context
255  *@param s context
256  */
dump_context(WMAProDecodeCtx * s)257 static av_cold void dump_context(WMAProDecodeCtx *s)
258 {
259 #define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
260 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %"PRIx32"\n", a, b);
261 
262     PRINT("ed sample bit depth", s->bits_per_sample);
263     PRINT_HEX("ed decode flags", s->decode_flags);
264     PRINT("samples per frame",   s->samples_per_frame);
265     PRINT("log2 frame size",     s->log2_frame_size);
266     PRINT("max num subframes",   s->max_num_subframes);
267     PRINT("len prefix",          s->len_prefix);
268     PRINT("num channels",        s->nb_channels);
269 }
270 
271 /**
272  *@brief Uninitialize the decoder and free all resources.
273  *@param avctx codec context
274  *@return 0 on success, < 0 otherwise
275  */
decode_end(WMAProDecodeCtx * s)276 static av_cold int decode_end(WMAProDecodeCtx *s)
277 {
278     int i;
279 
280     av_freep(&s->fdsp);
281 
282     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
283         ff_mdct_end(&s->mdct_ctx[i]);
284 
285     return 0;
286 }
287 
wmapro_decode_end(AVCodecContext * avctx)288 static av_cold int wmapro_decode_end(AVCodecContext *avctx)
289 {
290     WMAProDecodeCtx *s = avctx->priv_data;
291 
292     decode_end(s);
293 
294     return 0;
295 }
296 
get_rate(AVCodecContext * avctx)297 static av_cold int get_rate(AVCodecContext *avctx)
298 {
299     if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { // XXX: is this really only for XMA?
300         if (avctx->sample_rate > 44100)
301             return 48000;
302         else if (avctx->sample_rate > 32000)
303             return 44100;
304         else if (avctx->sample_rate > 24000)
305             return 32000;
306         return 24000;
307     }
308 
309     return avctx->sample_rate;
310 }
311 
312 /**
313  *@brief Initialize the decoder.
314  *@param avctx codec context
315  *@return 0 on success, -1 otherwise
316  */
decode_init(WMAProDecodeCtx * s,AVCodecContext * avctx,int num_stream)317 static av_cold int decode_init(WMAProDecodeCtx *s, AVCodecContext *avctx, int num_stream)
318 {
319     uint8_t *edata_ptr = avctx->extradata;
320     unsigned int channel_mask;
321     int i, bits;
322     int log2_max_num_subframes;
323     int num_possible_block_sizes;
324 
325     if (avctx->codec_id == AV_CODEC_ID_XMA1 || avctx->codec_id == AV_CODEC_ID_XMA2)
326         avctx->block_align = 2048;
327 
328     if (!avctx->block_align) {
329         av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
330         return AVERROR(EINVAL);
331     }
332 
333     s->avctx = avctx;
334 
335     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
336 
337     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
338 
339     /** dump the extradata */
340     av_log(avctx, AV_LOG_DEBUG, "extradata:\n");
341     for (i = 0; i < avctx->extradata_size; i++)
342         av_log(avctx, AV_LOG_DEBUG, "[%x] ", avctx->extradata[i]);
343     av_log(avctx, AV_LOG_DEBUG, "\n");
344 
345     if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
346         s->decode_flags    = 0x10d6;
347         s->bits_per_sample = 16;
348         channel_mask       = 0; //AV_RL32(edata_ptr+2); /* not always in expected order */
349         if ((num_stream+1) * XMA_MAX_CHANNELS_STREAM > avctx->channels) /* stream config is 2ch + 2ch + ... + 1/2ch */
350             s->nb_channels = 1;
351         else
352             s->nb_channels = 2;
353     } else if (avctx->codec_id == AV_CODEC_ID_XMA2) { /* XMA2WAVEFORMAT */
354         s->decode_flags    = 0x10d6;
355         s->bits_per_sample = 16;
356         channel_mask       = 0; /* would need to aggregate from all streams */
357         s->nb_channels = edata_ptr[32 + ((edata_ptr[0]==3)?0:8) + 4*num_stream + 0]; /* nth stream config */
358     } else if (avctx->codec_id == AV_CODEC_ID_XMA1) { /* XMAWAVEFORMAT */
359         s->decode_flags    = 0x10d6;
360         s->bits_per_sample = 16;
361         channel_mask       = 0; /* would need to aggregate from all streams */
362         s->nb_channels     = edata_ptr[8 + 20*num_stream + 17]; /* nth stream config */
363     } else if (avctx->codec_id == AV_CODEC_ID_WMAPRO && avctx->extradata_size >= 18) {
364         s->decode_flags    = AV_RL16(edata_ptr+14);
365         channel_mask       = AV_RL32(edata_ptr+2);
366         s->bits_per_sample = AV_RL16(edata_ptr);
367         s->nb_channels     = avctx->channels;
368 
369         if (s->bits_per_sample > 32 || s->bits_per_sample < 1) {
370             avpriv_request_sample(avctx, "bits per sample is %d", s->bits_per_sample);
371             return AVERROR_PATCHWELCOME;
372         }
373     } else {
374         avpriv_request_sample(avctx, "Unknown extradata size");
375         return AVERROR_PATCHWELCOME;
376     }
377 
378     /** generic init */
379     s->log2_frame_size = av_log2(avctx->block_align) + 4;
380     if (s->log2_frame_size > 25) {
381         avpriv_request_sample(avctx, "Large block align");
382         return AVERROR_PATCHWELCOME;
383     }
384 
385     /** frame info */
386     if (avctx->codec_id != AV_CODEC_ID_WMAPRO)
387         s->skip_frame = 0;
388     else
389         s->skip_frame = 1; /* skip first frame */
390 
391     s->packet_loss = 1;
392     s->len_prefix  = (s->decode_flags & 0x40);
393 
394     /** get frame len */
395     if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
396         bits = ff_wma_get_frame_len_bits(avctx->sample_rate, 3, s->decode_flags);
397         if (bits > WMAPRO_BLOCK_MAX_BITS) {
398             avpriv_request_sample(avctx, "14-bit block sizes");
399             return AVERROR_PATCHWELCOME;
400         }
401         s->samples_per_frame = 1 << bits;
402     } else {
403         s->samples_per_frame = 512;
404     }
405 
406     /** subframe info */
407     log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
408     s->max_num_subframes         = 1 << log2_max_num_subframes;
409     if (s->max_num_subframes == 16 || s->max_num_subframes == 4)
410         s->max_subframe_len_bit = 1;
411     s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
412 
413     num_possible_block_sizes     = log2_max_num_subframes + 1;
414     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
415     s->dynamic_range_compression = (s->decode_flags & 0x80);
416 
417     if (s->max_num_subframes > MAX_SUBFRAMES) {
418         av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %"PRId8"\n",
419                s->max_num_subframes);
420         return AVERROR_INVALIDDATA;
421     }
422 
423     if (s->min_samples_per_subframe < WMAPRO_BLOCK_MIN_SIZE) {
424         av_log(avctx, AV_LOG_ERROR, "min_samples_per_subframe of %d too small\n",
425                s->min_samples_per_subframe);
426         return AVERROR_INVALIDDATA;
427     }
428 
429     if (s->avctx->sample_rate <= 0) {
430         av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
431         return AVERROR_INVALIDDATA;
432     }
433 
434     if (s->nb_channels <= 0) {
435         av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n",
436                s->nb_channels);
437         return AVERROR_INVALIDDATA;
438     } else if (avctx->codec_id != AV_CODEC_ID_WMAPRO && s->nb_channels > XMA_MAX_CHANNELS_STREAM) {
439         av_log(avctx, AV_LOG_ERROR, "invalid number of channels per XMA stream %d\n",
440                s->nb_channels);
441         return AVERROR_INVALIDDATA;
442     } else if (s->nb_channels > WMAPRO_MAX_CHANNELS || s->nb_channels > avctx->channels) {
443         avpriv_request_sample(avctx,
444                               "More than %d channels", WMAPRO_MAX_CHANNELS);
445         return AVERROR_PATCHWELCOME;
446     }
447 
448     /** init previous block len */
449     for (i = 0; i < s->nb_channels; i++)
450         s->channel[i].prev_block_len = s->samples_per_frame;
451 
452     /** extract lfe channel position */
453     s->lfe_channel = -1;
454 
455     if (channel_mask & 8) {
456         unsigned int mask;
457         for (mask = 1; mask < 16; mask <<= 1) {
458             if (channel_mask & mask)
459                 ++s->lfe_channel;
460         }
461     }
462 
463     INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
464                     scale_huffbits, 1, 1,
465                     scale_huffcodes, 2, 2, 616);
466 
467     INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
468                     scale_rl_huffbits, 1, 1,
469                     scale_rl_huffcodes, 4, 4, 1406);
470 
471     INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
472                     coef0_huffbits, 1, 1,
473                     coef0_huffcodes, 4, 4, 2108);
474 
475     INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
476                     coef1_huffbits, 1, 1,
477                     coef1_huffcodes, 4, 4, 3912);
478 
479     INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
480                     vec4_huffbits, 1, 1,
481                     vec4_huffcodes, 2, 2, 604);
482 
483     INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
484                     vec2_huffbits, 1, 1,
485                     vec2_huffcodes, 2, 2, 562);
486 
487     INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
488                     vec1_huffbits, 1, 1,
489                     vec1_huffcodes, 2, 2, 562);
490 
491     /** calculate number of scale factor bands and their offsets
492         for every possible block size */
493     for (i = 0; i < num_possible_block_sizes; i++) {
494         int subframe_len = s->samples_per_frame >> i;
495         int x;
496         int band = 1;
497         int rate = get_rate(avctx);
498 
499         s->sfb_offsets[i][0] = 0;
500 
501         for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
502             int offset = (subframe_len * 2 * critical_freq[x]) / rate + 2;
503             offset &= ~3;
504             if (offset > s->sfb_offsets[i][band - 1])
505                 s->sfb_offsets[i][band++] = offset;
506 
507             if (offset >= subframe_len)
508                 break;
509         }
510         s->sfb_offsets[i][band - 1] = subframe_len;
511         s->num_sfb[i]               = band - 1;
512         if (s->num_sfb[i] <= 0) {
513             av_log(avctx, AV_LOG_ERROR, "num_sfb invalid\n");
514             return AVERROR_INVALIDDATA;
515         }
516     }
517 
518 
519     /** Scale factors can be shared between blocks of different size
520         as every block has a different scale factor band layout.
521         The matrix sf_offsets is needed to find the correct scale factor.
522      */
523 
524     for (i = 0; i < num_possible_block_sizes; i++) {
525         int b;
526         for (b = 0; b < s->num_sfb[i]; b++) {
527             int x;
528             int offset = ((s->sfb_offsets[i][b]
529                            + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
530             for (x = 0; x < num_possible_block_sizes; x++) {
531                 int v = 0;
532                 while (s->sfb_offsets[x][v + 1] << x < offset) {
533                     v++;
534                     av_assert0(v < MAX_BANDS);
535                 }
536                 s->sf_offsets[i][x][b] = v;
537             }
538         }
539     }
540 
541     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
542     if (!s->fdsp)
543         return AVERROR(ENOMEM);
544 
545     /** init MDCT, FIXME: only init needed sizes */
546     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
547         ff_mdct_init(&s->mdct_ctx[i], WMAPRO_BLOCK_MIN_BITS+1+i, 1,
548                      1.0 / (1 << (WMAPRO_BLOCK_MIN_BITS + i - 1))
549                      / (1ll << (s->bits_per_sample - 1)));
550 
551     /** init MDCT windows: simple sine window */
552     for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
553         const int win_idx = WMAPRO_BLOCK_MAX_BITS - i;
554         ff_init_ff_sine_windows(win_idx);
555         s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
556     }
557 
558     /** calculate subwoofer cutoff values */
559     for (i = 0; i < num_possible_block_sizes; i++) {
560         int block_size = s->samples_per_frame >> i;
561         int cutoff = (440*block_size + 3LL * (s->avctx->sample_rate >> 1) - 1)
562                      / s->avctx->sample_rate;
563         s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
564     }
565 
566     /** calculate sine values for the decorrelation matrix */
567     for (i = 0; i < 33; i++)
568         sin64[i] = sin(i*M_PI / 64.0);
569 
570     if (avctx->debug & FF_DEBUG_BITSTREAM)
571         dump_context(s);
572 
573     avctx->channel_layout = channel_mask;
574 
575     return 0;
576 }
577 
578 /**
579  *@brief Initialize the decoder.
580  *@param avctx codec context
581  *@return 0 on success, -1 otherwise
582  */
wmapro_decode_init(AVCodecContext * avctx)583 static av_cold int wmapro_decode_init(AVCodecContext *avctx)
584 {
585     WMAProDecodeCtx *s = avctx->priv_data;
586 
587     return decode_init(s, avctx, 0);
588 }
589 
590 /**
591  *@brief Decode the subframe length.
592  *@param s context
593  *@param offset sample offset in the frame
594  *@return decoded subframe length on success, < 0 in case of an error
595  */
decode_subframe_length(WMAProDecodeCtx * s,int offset)596 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
597 {
598     int frame_len_shift = 0;
599     int subframe_len;
600 
601     /** no need to read from the bitstream when only one length is possible */
602     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
603         return s->min_samples_per_subframe;
604 
605     if (get_bits_left(&s->gb) < 1)
606         return AVERROR_INVALIDDATA;
607 
608     /** 1 bit indicates if the subframe is of maximum length */
609     if (s->max_subframe_len_bit) {
610         if (get_bits1(&s->gb))
611             frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
612     } else
613         frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
614 
615     subframe_len = s->samples_per_frame >> frame_len_shift;
616 
617     /** sanity check the length */
618     if (subframe_len < s->min_samples_per_subframe ||
619         subframe_len > s->samples_per_frame) {
620         av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
621                subframe_len);
622         return AVERROR_INVALIDDATA;
623     }
624     return subframe_len;
625 }
626 
627 /**
628  *@brief Decode how the data in the frame is split into subframes.
629  *       Every WMA frame contains the encoded data for a fixed number of
630  *       samples per channel. The data for every channel might be split
631  *       into several subframes. This function will reconstruct the list of
632  *       subframes for every channel.
633  *
634  *       If the subframes are not evenly split, the algorithm estimates the
635  *       channels with the lowest number of total samples.
636  *       Afterwards, for each of these channels a bit is read from the
637  *       bitstream that indicates if the channel contains a subframe with the
638  *       next subframe size that is going to be read from the bitstream or not.
639  *       If a channel contains such a subframe, the subframe size gets added to
640  *       the channel's subframe list.
641  *       The algorithm repeats these steps until the frame is properly divided
642  *       between the individual channels.
643  *
644  *@param s context
645  *@return 0 on success, < 0 in case of an error
646  */
decode_tilehdr(WMAProDecodeCtx * s)647 static int decode_tilehdr(WMAProDecodeCtx *s)
648 {
649     uint16_t num_samples[WMAPRO_MAX_CHANNELS] = { 0 };/**< sum of samples for all currently known subframes of a channel */
650     uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /**< flag indicating if a channel contains the current subframe */
651     int channels_for_cur_subframe = s->nb_channels;   /**< number of channels that contain the current subframe */
652     int fixed_channel_layout = 0;                     /**< flag indicating that all channels use the same subframe offsets and sizes */
653     int min_channel_len = 0;                          /**< smallest sum of samples (channels with this length will be processed first) */
654     int c;
655 
656     /* Should never consume more than 3073 bits (256 iterations for the
657      * while loop when always the minimum amount of 128 samples is subtracted
658      * from missing samples in the 8 channel case).
659      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
660      */
661 
662     /** reset tiling information */
663     for (c = 0; c < s->nb_channels; c++)
664         s->channel[c].num_subframes = 0;
665 
666     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
667         fixed_channel_layout = 1;
668 
669     /** loop until the frame data is split between the subframes */
670     do {
671         int subframe_len;
672 
673         /** check which channels contain the subframe */
674         for (c = 0; c < s->nb_channels; c++) {
675             if (num_samples[c] == min_channel_len) {
676                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
677                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
678                     contains_subframe[c] = 1;
679                 else
680                     contains_subframe[c] = get_bits1(&s->gb);
681             } else
682                 contains_subframe[c] = 0;
683         }
684 
685         /** get subframe length, subframe_len == 0 is not allowed */
686         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
687             return AVERROR_INVALIDDATA;
688 
689         /** add subframes to the individual channels and find new min_channel_len */
690         min_channel_len += subframe_len;
691         for (c = 0; c < s->nb_channels; c++) {
692             WMAProChannelCtx* chan = &s->channel[c];
693 
694             if (contains_subframe[c]) {
695                 if (chan->num_subframes >= MAX_SUBFRAMES) {
696                     av_log(s->avctx, AV_LOG_ERROR,
697                            "broken frame: num subframes > 31\n");
698                     return AVERROR_INVALIDDATA;
699                 }
700                 chan->subframe_len[chan->num_subframes] = subframe_len;
701                 num_samples[c] += subframe_len;
702                 ++chan->num_subframes;
703                 if (num_samples[c] > s->samples_per_frame) {
704                     av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
705                            "channel len > samples_per_frame\n");
706                     return AVERROR_INVALIDDATA;
707                 }
708             } else if (num_samples[c] <= min_channel_len) {
709                 if (num_samples[c] < min_channel_len) {
710                     channels_for_cur_subframe = 0;
711                     min_channel_len = num_samples[c];
712                 }
713                 ++channels_for_cur_subframe;
714             }
715         }
716     } while (min_channel_len < s->samples_per_frame);
717 
718     for (c = 0; c < s->nb_channels; c++) {
719         int i;
720         int offset = 0;
721         for (i = 0; i < s->channel[c].num_subframes; i++) {
722             ff_dlog(s->avctx, "frame[%"PRIu32"] channel[%i] subframe[%i]"
723                     " len %i\n", s->frame_num, c, i,
724                     s->channel[c].subframe_len[i]);
725             s->channel[c].subframe_offset[i] = offset;
726             offset += s->channel[c].subframe_len[i];
727         }
728     }
729 
730     return 0;
731 }
732 
733 /**
734  *@brief Calculate a decorrelation matrix from the bitstream parameters.
735  *@param s codec context
736  *@param chgroup channel group for which the matrix needs to be calculated
737  */
decode_decorrelation_matrix(WMAProDecodeCtx * s,WMAProChannelGrp * chgroup)738 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
739                                         WMAProChannelGrp *chgroup)
740 {
741     int i;
742     int offset = 0;
743     int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
744     memset(chgroup->decorrelation_matrix, 0, s->nb_channels *
745            s->nb_channels * sizeof(*chgroup->decorrelation_matrix));
746 
747     for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
748         rotation_offset[i] = get_bits(&s->gb, 6);
749 
750     for (i = 0; i < chgroup->num_channels; i++)
751         chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
752             get_bits1(&s->gb) ? 1.0 : -1.0;
753 
754     for (i = 1; i < chgroup->num_channels; i++) {
755         int x;
756         for (x = 0; x < i; x++) {
757             int y;
758             for (y = 0; y < i + 1; y++) {
759                 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
760                 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
761                 int n = rotation_offset[offset + x];
762                 float sinv;
763                 float cosv;
764 
765                 if (n < 32) {
766                     sinv = sin64[n];
767                     cosv = sin64[32 - n];
768                 } else {
769                     sinv =  sin64[64 -  n];
770                     cosv = -sin64[n  - 32];
771                 }
772 
773                 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
774                                                (v1 * sinv) - (v2 * cosv);
775                 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
776                                                (v1 * cosv) + (v2 * sinv);
777             }
778         }
779         offset += i;
780     }
781 }
782 
783 /**
784  *@brief Decode channel transformation parameters
785  *@param s codec context
786  *@return >= 0 in case of success, < 0 in case of bitstream errors
787  */
decode_channel_transform(WMAProDecodeCtx * s)788 static int decode_channel_transform(WMAProDecodeCtx* s)
789 {
790     int i;
791     /* should never consume more than 1921 bits for the 8 channel case
792      * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
793      * + MAX_CHANNELS + MAX_BANDS + 1)
794      */
795 
796     /** in the one channel case channel transforms are pointless */
797     s->num_chgroups = 0;
798     if (s->nb_channels > 1) {
799         int remaining_channels = s->channels_for_cur_subframe;
800 
801         if (get_bits1(&s->gb)) {
802             avpriv_request_sample(s->avctx,
803                                   "Channel transform bit");
804             return AVERROR_PATCHWELCOME;
805         }
806 
807         for (s->num_chgroups = 0; remaining_channels &&
808              s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
809             WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
810             float** channel_data = chgroup->channel_data;
811             chgroup->num_channels = 0;
812             chgroup->transform = 0;
813 
814             /** decode channel mask */
815             if (remaining_channels > 2) {
816                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
817                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
818                     if (!s->channel[channel_idx].grouped
819                         && get_bits1(&s->gb)) {
820                         ++chgroup->num_channels;
821                         s->channel[channel_idx].grouped = 1;
822                         *channel_data++ = s->channel[channel_idx].coeffs;
823                     }
824                 }
825             } else {
826                 chgroup->num_channels = remaining_channels;
827                 for (i = 0; i < s->channels_for_cur_subframe; i++) {
828                     int channel_idx = s->channel_indexes_for_cur_subframe[i];
829                     if (!s->channel[channel_idx].grouped)
830                         *channel_data++ = s->channel[channel_idx].coeffs;
831                     s->channel[channel_idx].grouped = 1;
832                 }
833             }
834 
835             /** decode transform type */
836             if (chgroup->num_channels == 2) {
837                 if (get_bits1(&s->gb)) {
838                     if (get_bits1(&s->gb)) {
839                         avpriv_request_sample(s->avctx,
840                                               "Unknown channel transform type");
841                         return AVERROR_PATCHWELCOME;
842                     }
843                 } else {
844                     chgroup->transform = 1;
845                     if (s->nb_channels == 2) {
846                         chgroup->decorrelation_matrix[0] =  1.0;
847                         chgroup->decorrelation_matrix[1] = -1.0;
848                         chgroup->decorrelation_matrix[2] =  1.0;
849                         chgroup->decorrelation_matrix[3] =  1.0;
850                     } else {
851                         /** cos(pi/4) */
852                         chgroup->decorrelation_matrix[0] =  0.70703125;
853                         chgroup->decorrelation_matrix[1] = -0.70703125;
854                         chgroup->decorrelation_matrix[2] =  0.70703125;
855                         chgroup->decorrelation_matrix[3] =  0.70703125;
856                     }
857                 }
858             } else if (chgroup->num_channels > 2) {
859                 if (get_bits1(&s->gb)) {
860                     chgroup->transform = 1;
861                     if (get_bits1(&s->gb)) {
862                         decode_decorrelation_matrix(s, chgroup);
863                     } else {
864                         /** FIXME: more than 6 coupled channels not supported */
865                         if (chgroup->num_channels > 6) {
866                             avpriv_request_sample(s->avctx,
867                                                   "Coupled channels > 6");
868                         } else {
869                             memcpy(chgroup->decorrelation_matrix,
870                                    default_decorrelation[chgroup->num_channels],
871                                    chgroup->num_channels * chgroup->num_channels *
872                                    sizeof(*chgroup->decorrelation_matrix));
873                         }
874                     }
875                 }
876             }
877 
878             /** decode transform on / off */
879             if (chgroup->transform) {
880                 if (!get_bits1(&s->gb)) {
881                     int i;
882                     /** transform can be enabled for individual bands */
883                     for (i = 0; i < s->num_bands; i++) {
884                         chgroup->transform_band[i] = get_bits1(&s->gb);
885                     }
886                 } else {
887                     memset(chgroup->transform_band, 1, s->num_bands);
888                 }
889             }
890             remaining_channels -= chgroup->num_channels;
891         }
892     }
893     return 0;
894 }
895 
896 /**
897  *@brief Extract the coefficients from the bitstream.
898  *@param s codec context
899  *@param c current channel number
900  *@return 0 on success, < 0 in case of bitstream errors
901  */
decode_coeffs(WMAProDecodeCtx * s,int c)902 static int decode_coeffs(WMAProDecodeCtx *s, int c)
903 {
904     /* Integers 0..15 as single-precision floats.  The table saves a
905        costly int to float conversion, and storing the values as
906        integers allows fast sign-flipping. */
907     static const uint32_t fval_tab[16] = {
908         0x00000000, 0x3f800000, 0x40000000, 0x40400000,
909         0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
910         0x41000000, 0x41100000, 0x41200000, 0x41300000,
911         0x41400000, 0x41500000, 0x41600000, 0x41700000,
912     };
913     int vlctable;
914     VLC* vlc;
915     WMAProChannelCtx* ci = &s->channel[c];
916     int rl_mode = 0;
917     int cur_coeff = 0;
918     int num_zeros = 0;
919     const uint16_t* run;
920     const float* level;
921 
922     ff_dlog(s->avctx, "decode coefficients for channel %i\n", c);
923 
924     vlctable = get_bits1(&s->gb);
925     vlc = &coef_vlc[vlctable];
926 
927     if (vlctable) {
928         run = coef1_run;
929         level = coef1_level;
930     } else {
931         run = coef0_run;
932         level = coef0_level;
933     }
934 
935     /** decode vector coefficients (consumes up to 167 bits per iteration for
936       4 vector coded large values) */
937     while ((s->transmit_num_vec_coeffs || !rl_mode) &&
938            (cur_coeff + 3 < ci->num_vec_coeffs)) {
939         uint32_t vals[4];
940         int i;
941         unsigned int idx;
942 
943         idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
944 
945         if (idx == HUFF_VEC4_SIZE - 1) {
946             for (i = 0; i < 4; i += 2) {
947                 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
948                 if (idx == HUFF_VEC2_SIZE - 1) {
949                     uint32_t v0, v1;
950                     v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
951                     if (v0 == HUFF_VEC1_SIZE - 1)
952                         v0 += ff_wma_get_large_val(&s->gb);
953                     v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
954                     if (v1 == HUFF_VEC1_SIZE - 1)
955                         v1 += ff_wma_get_large_val(&s->gb);
956                     vals[i  ] = av_float2int(v0);
957                     vals[i+1] = av_float2int(v1);
958                 } else {
959                     vals[i]   = fval_tab[symbol_to_vec2[idx] >> 4 ];
960                     vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
961                 }
962             }
963         } else {
964             vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12      ];
965             vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
966             vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
967             vals[3] = fval_tab[ symbol_to_vec4[idx]       & 0xF];
968         }
969 
970         /** decode sign */
971         for (i = 0; i < 4; i++) {
972             if (vals[i]) {
973                 uint32_t sign = get_bits1(&s->gb) - 1;
974                 AV_WN32A(&ci->coeffs[cur_coeff], vals[i] ^ sign << 31);
975                 num_zeros = 0;
976             } else {
977                 ci->coeffs[cur_coeff] = 0;
978                 /** switch to run level mode when subframe_len / 128 zeros
979                     were found in a row */
980                 rl_mode |= (++num_zeros > s->subframe_len >> 8);
981             }
982             ++cur_coeff;
983         }
984     }
985 
986     /** decode run level coded coefficients */
987     if (cur_coeff < s->subframe_len) {
988         int ret;
989 
990         memset(&ci->coeffs[cur_coeff], 0,
991                sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
992         ret = ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
993                                       level, run, 1, ci->coeffs,
994                                       cur_coeff, s->subframe_len,
995                                       s->subframe_len, s->esc_len, 0);
996         if (ret < 0)
997             return ret;
998     }
999 
1000     return 0;
1001 }
1002 
1003 /**
1004  *@brief Extract scale factors from the bitstream.
1005  *@param s codec context
1006  *@return 0 on success, < 0 in case of bitstream errors
1007  */
decode_scale_factors(WMAProDecodeCtx * s)1008 static int decode_scale_factors(WMAProDecodeCtx* s)
1009 {
1010     int i;
1011 
1012     /** should never consume more than 5344 bits
1013      *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
1014      */
1015 
1016     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1017         int c = s->channel_indexes_for_cur_subframe[i];
1018         int* sf;
1019         int* sf_end;
1020         s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
1021         sf_end = s->channel[c].scale_factors + s->num_bands;
1022 
1023         /** resample scale factors for the new block size
1024          *  as the scale factors might need to be resampled several times
1025          *  before some  new values are transmitted, a backup of the last
1026          *  transmitted scale factors is kept in saved_scale_factors
1027          */
1028         if (s->channel[c].reuse_sf) {
1029             const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
1030             int b;
1031             for (b = 0; b < s->num_bands; b++)
1032                 s->channel[c].scale_factors[b] =
1033                     s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
1034         }
1035 
1036         if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
1037 
1038             if (!s->channel[c].reuse_sf) {
1039                 int val;
1040                 /** decode DPCM coded scale factors */
1041                 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
1042                 val = 45 / s->channel[c].scale_factor_step;
1043                 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
1044                     val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
1045                     *sf = val;
1046                 }
1047             } else {
1048                 int i;
1049                 /** run level decode differences to the resampled factors */
1050                 for (i = 0; i < s->num_bands; i++) {
1051                     int idx;
1052                     int skip;
1053                     int val;
1054                     int sign;
1055 
1056                     idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
1057 
1058                     if (!idx) {
1059                         uint32_t code = get_bits(&s->gb, 14);
1060                         val  =  code >> 6;
1061                         sign = (code & 1) - 1;
1062                         skip = (code & 0x3f) >> 1;
1063                     } else if (idx == 1) {
1064                         break;
1065                     } else {
1066                         skip = scale_rl_run[idx];
1067                         val  = scale_rl_level[idx];
1068                         sign = get_bits1(&s->gb)-1;
1069                     }
1070 
1071                     i += skip;
1072                     if (i >= s->num_bands) {
1073                         av_log(s->avctx, AV_LOG_ERROR,
1074                                "invalid scale factor coding\n");
1075                         return AVERROR_INVALIDDATA;
1076                     }
1077                     s->channel[c].scale_factors[i] += (val ^ sign) - sign;
1078                 }
1079             }
1080             /** swap buffers */
1081             s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
1082             s->channel[c].table_idx = s->table_idx;
1083             s->channel[c].reuse_sf  = 1;
1084         }
1085 
1086         /** calculate new scale factor maximum */
1087         s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
1088         for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
1089             s->channel[c].max_scale_factor =
1090                 FFMAX(s->channel[c].max_scale_factor, *sf);
1091         }
1092 
1093     }
1094     return 0;
1095 }
1096 
1097 /**
1098  *@brief Reconstruct the individual channel data.
1099  *@param s codec context
1100  */
inverse_channel_transform(WMAProDecodeCtx * s)1101 static void inverse_channel_transform(WMAProDecodeCtx *s)
1102 {
1103     int i;
1104 
1105     for (i = 0; i < s->num_chgroups; i++) {
1106         if (s->chgroup[i].transform) {
1107             float data[WMAPRO_MAX_CHANNELS];
1108             const int num_channels = s->chgroup[i].num_channels;
1109             float** ch_data = s->chgroup[i].channel_data;
1110             float** ch_end = ch_data + num_channels;
1111             const int8_t* tb = s->chgroup[i].transform_band;
1112             int16_t* sfb;
1113 
1114             /** multichannel decorrelation */
1115             for (sfb = s->cur_sfb_offsets;
1116                  sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
1117                 int y;
1118                 if (*tb++ == 1) {
1119                     /** multiply values with the decorrelation_matrix */
1120                     for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
1121                         const float* mat = s->chgroup[i].decorrelation_matrix;
1122                         const float* data_end = data + num_channels;
1123                         float* data_ptr = data;
1124                         float** ch;
1125 
1126                         for (ch = ch_data; ch < ch_end; ch++)
1127                             *data_ptr++ = (*ch)[y];
1128 
1129                         for (ch = ch_data; ch < ch_end; ch++) {
1130                             float sum = 0;
1131                             data_ptr = data;
1132                             while (data_ptr < data_end)
1133                                 sum += *data_ptr++ * *mat++;
1134 
1135                             (*ch)[y] = sum;
1136                         }
1137                     }
1138                 } else if (s->nb_channels == 2) {
1139                     int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1140                     s->fdsp->vector_fmul_scalar(ch_data[0] + sfb[0],
1141                                                ch_data[0] + sfb[0],
1142                                                181.0 / 128, len);
1143                     s->fdsp->vector_fmul_scalar(ch_data[1] + sfb[0],
1144                                                ch_data[1] + sfb[0],
1145                                                181.0 / 128, len);
1146                 }
1147             }
1148         }
1149     }
1150 }
1151 
1152 /**
1153  *@brief Apply sine window and reconstruct the output buffer.
1154  *@param s codec context
1155  */
wmapro_window(WMAProDecodeCtx * s)1156 static void wmapro_window(WMAProDecodeCtx *s)
1157 {
1158     int i;
1159     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1160         int c = s->channel_indexes_for_cur_subframe[i];
1161         const float* window;
1162         int winlen = s->channel[c].prev_block_len;
1163         float* start = s->channel[c].coeffs - (winlen >> 1);
1164 
1165         if (s->subframe_len < winlen) {
1166             start += (winlen - s->subframe_len) >> 1;
1167             winlen = s->subframe_len;
1168         }
1169 
1170         window = s->windows[av_log2(winlen) - WMAPRO_BLOCK_MIN_BITS];
1171 
1172         winlen >>= 1;
1173 
1174         s->fdsp->vector_fmul_window(start, start, start + winlen,
1175                                    window, winlen);
1176 
1177         s->channel[c].prev_block_len = s->subframe_len;
1178     }
1179 }
1180 
1181 /**
1182  *@brief Decode a single subframe (block).
1183  *@param s codec context
1184  *@return 0 on success, < 0 when decoding failed
1185  */
decode_subframe(WMAProDecodeCtx * s)1186 static int decode_subframe(WMAProDecodeCtx *s)
1187 {
1188     int offset = s->samples_per_frame;
1189     int subframe_len = s->samples_per_frame;
1190     int i;
1191     int total_samples   = s->samples_per_frame * s->nb_channels;
1192     int transmit_coeffs = 0;
1193     int cur_subwoofer_cutoff;
1194 
1195     s->subframe_offset = get_bits_count(&s->gb);
1196 
1197     /** reset channel context and find the next block offset and size
1198         == the next block of the channel with the smallest number of
1199         decoded samples
1200     */
1201     for (i = 0; i < s->nb_channels; i++) {
1202         s->channel[i].grouped = 0;
1203         if (offset > s->channel[i].decoded_samples) {
1204             offset = s->channel[i].decoded_samples;
1205             subframe_len =
1206                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1207         }
1208     }
1209 
1210     ff_dlog(s->avctx,
1211             "processing subframe with offset %i len %i\n", offset, subframe_len);
1212 
1213     /** get a list of all channels that contain the estimated block */
1214     s->channels_for_cur_subframe = 0;
1215     for (i = 0; i < s->nb_channels; i++) {
1216         const int cur_subframe = s->channel[i].cur_subframe;
1217         /** subtract already processed samples */
1218         total_samples -= s->channel[i].decoded_samples;
1219 
1220         /** and count if there are multiple subframes that match our profile */
1221         if (offset == s->channel[i].decoded_samples &&
1222             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1223             total_samples -= s->channel[i].subframe_len[cur_subframe];
1224             s->channel[i].decoded_samples +=
1225                 s->channel[i].subframe_len[cur_subframe];
1226             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1227             ++s->channels_for_cur_subframe;
1228         }
1229     }
1230 
1231     /** check if the frame will be complete after processing the
1232         estimated block */
1233     if (!total_samples)
1234         s->parsed_all_subframes = 1;
1235 
1236 
1237     ff_dlog(s->avctx, "subframe is part of %i channels\n",
1238             s->channels_for_cur_subframe);
1239 
1240     /** calculate number of scale factor bands and their offsets */
1241     s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
1242     s->num_bands         = s->num_sfb[s->table_idx];
1243     s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
1244     cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1245 
1246     /** configure the decoder for the current subframe */
1247     offset += s->samples_per_frame >> 1;
1248 
1249     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1250         int c = s->channel_indexes_for_cur_subframe[i];
1251 
1252         s->channel[c].coeffs = &s->channel[c].out[offset];
1253     }
1254 
1255     s->subframe_len = subframe_len;
1256     s->esc_len = av_log2(s->subframe_len - 1) + 1;
1257 
1258     /** skip extended header if any */
1259     if (get_bits1(&s->gb)) {
1260         int num_fill_bits;
1261         if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1262             int len = get_bits(&s->gb, 4);
1263             num_fill_bits = get_bitsz(&s->gb, len) + 1;
1264         }
1265 
1266         if (num_fill_bits >= 0) {
1267             if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1268                 av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
1269                 return AVERROR_INVALIDDATA;
1270             }
1271 
1272             skip_bits_long(&s->gb, num_fill_bits);
1273         }
1274     }
1275 
1276     /** no idea for what the following bit is used */
1277     if (get_bits1(&s->gb)) {
1278         avpriv_request_sample(s->avctx, "Reserved bit");
1279         return AVERROR_PATCHWELCOME;
1280     }
1281 
1282 
1283     if (decode_channel_transform(s) < 0)
1284         return AVERROR_INVALIDDATA;
1285 
1286 
1287     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1288         int c = s->channel_indexes_for_cur_subframe[i];
1289         if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1290             transmit_coeffs = 1;
1291     }
1292 
1293     av_assert0(s->subframe_len <= WMAPRO_BLOCK_MAX_SIZE);
1294     if (transmit_coeffs) {
1295         int step;
1296         int quant_step = 90 * s->bits_per_sample >> 4;
1297 
1298         /** decode number of vector coded coefficients */
1299         if ((s->transmit_num_vec_coeffs = get_bits1(&s->gb))) {
1300             int num_bits = av_log2((s->subframe_len + 3)/4) + 1;
1301             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1302                 int c = s->channel_indexes_for_cur_subframe[i];
1303                 int num_vec_coeffs = get_bits(&s->gb, num_bits) << 2;
1304                 if (num_vec_coeffs > s->subframe_len) {
1305                     av_log(s->avctx, AV_LOG_ERROR, "num_vec_coeffs %d is too large\n", num_vec_coeffs);
1306                     return AVERROR_INVALIDDATA;
1307                 }
1308                 av_assert0(num_vec_coeffs + offset <= FF_ARRAY_ELEMS(s->channel[c].out));
1309                 s->channel[c].num_vec_coeffs = num_vec_coeffs;
1310             }
1311         } else {
1312             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1313                 int c = s->channel_indexes_for_cur_subframe[i];
1314                 s->channel[c].num_vec_coeffs = s->subframe_len;
1315             }
1316         }
1317         /** decode quantization step */
1318         step = get_sbits(&s->gb, 6);
1319         quant_step += step;
1320         if (step == -32 || step == 31) {
1321             const int sign = (step == 31) - 1;
1322             int quant = 0;
1323             while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1324                    (step = get_bits(&s->gb, 5)) == 31) {
1325                 quant += 31;
1326             }
1327             quant_step += ((quant + step) ^ sign) - sign;
1328         }
1329         if (quant_step < 0) {
1330             av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
1331         }
1332 
1333         /** decode quantization step modifiers for every channel */
1334 
1335         if (s->channels_for_cur_subframe == 1) {
1336             s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1337         } else {
1338             int modifier_len = get_bits(&s->gb, 3);
1339             for (i = 0; i < s->channels_for_cur_subframe; i++) {
1340                 int c = s->channel_indexes_for_cur_subframe[i];
1341                 s->channel[c].quant_step = quant_step;
1342                 if (get_bits1(&s->gb)) {
1343                     if (modifier_len) {
1344                         s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1345                     } else
1346                         ++s->channel[c].quant_step;
1347                 }
1348             }
1349         }
1350 
1351         /** decode scale factors */
1352         if (decode_scale_factors(s) < 0)
1353             return AVERROR_INVALIDDATA;
1354     }
1355 
1356     ff_dlog(s->avctx, "BITSTREAM: subframe header length was %i\n",
1357             get_bits_count(&s->gb) - s->subframe_offset);
1358 
1359     /** parse coefficients */
1360     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1361         int c = s->channel_indexes_for_cur_subframe[i];
1362         if (s->channel[c].transmit_coefs &&
1363             get_bits_count(&s->gb) < s->num_saved_bits) {
1364             decode_coeffs(s, c);
1365         } else
1366             memset(s->channel[c].coeffs, 0,
1367                    sizeof(*s->channel[c].coeffs) * subframe_len);
1368     }
1369 
1370     ff_dlog(s->avctx, "BITSTREAM: subframe length was %i\n",
1371             get_bits_count(&s->gb) - s->subframe_offset);
1372 
1373     if (transmit_coeffs) {
1374         FFTContext *mdct = &s->mdct_ctx[av_log2(subframe_len) - WMAPRO_BLOCK_MIN_BITS];
1375         /** reconstruct the per channel data */
1376         inverse_channel_transform(s);
1377         for (i = 0; i < s->channels_for_cur_subframe; i++) {
1378             int c = s->channel_indexes_for_cur_subframe[i];
1379             const int* sf = s->channel[c].scale_factors;
1380             int b;
1381 
1382             if (c == s->lfe_channel)
1383                 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1384                        (subframe_len - cur_subwoofer_cutoff));
1385 
1386             /** inverse quantization and rescaling */
1387             for (b = 0; b < s->num_bands; b++) {
1388                 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1389                 const int exp = s->channel[c].quant_step -
1390                             (s->channel[c].max_scale_factor - *sf++) *
1391                             s->channel[c].scale_factor_step;
1392                 const float quant = ff_exp10(exp / 20.0);
1393                 int start = s->cur_sfb_offsets[b];
1394                 s->fdsp->vector_fmul_scalar(s->tmp + start,
1395                                            s->channel[c].coeffs + start,
1396                                            quant, end - start);
1397             }
1398 
1399             /** apply imdct (imdct_half == DCTIV with reverse) */
1400             mdct->imdct_half(mdct, s->channel[c].coeffs, s->tmp);
1401         }
1402     }
1403 
1404     /** window and overlapp-add */
1405     wmapro_window(s);
1406 
1407     /** handled one subframe */
1408     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1409         int c = s->channel_indexes_for_cur_subframe[i];
1410         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1411             av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1412             return AVERROR_INVALIDDATA;
1413         }
1414         ++s->channel[c].cur_subframe;
1415     }
1416 
1417     return 0;
1418 }
1419 
1420 /**
1421  *@brief Decode one WMA frame.
1422  *@param s codec context
1423  *@return 0 if the trailer bit indicates that this is the last frame,
1424  *        1 if there are additional frames
1425  */
decode_frame(WMAProDecodeCtx * s,AVFrame * frame,int * got_frame_ptr)1426 static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
1427 {
1428     GetBitContext* gb = &s->gb;
1429     int more_frames = 0;
1430     int len = 0;
1431     int i;
1432 
1433     /** get frame length */
1434     if (s->len_prefix)
1435         len = get_bits(gb, s->log2_frame_size);
1436 
1437     ff_dlog(s->avctx, "decoding frame with length %x\n", len);
1438 
1439     /** decode tile information */
1440     if (decode_tilehdr(s)) {
1441         s->packet_loss = 1;
1442         return 0;
1443     }
1444 
1445     /** read postproc transform */
1446     if (s->nb_channels > 1 && get_bits1(gb)) {
1447         if (get_bits1(gb)) {
1448             for (i = 0; i < s->nb_channels * s->nb_channels; i++)
1449                 skip_bits(gb, 4);
1450         }
1451     }
1452 
1453     /** read drc info */
1454     if (s->dynamic_range_compression) {
1455         s->drc_gain = get_bits(gb, 8);
1456         ff_dlog(s->avctx, "drc_gain %i\n", s->drc_gain);
1457     }
1458 
1459     /** no idea what these are for, might be the number of samples
1460         that need to be skipped at the beginning or end of a stream */
1461     if (get_bits1(gb)) {
1462         int av_unused skip;
1463 
1464         /** usually true for the first frame */
1465         if (get_bits1(gb)) {
1466             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1467             ff_dlog(s->avctx, "start skip: %i\n", skip);
1468         }
1469 
1470         /** sometimes true for the last frame */
1471         if (get_bits1(gb)) {
1472             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1473             ff_dlog(s->avctx, "end skip: %i\n", skip);
1474         }
1475 
1476     }
1477 
1478     ff_dlog(s->avctx, "BITSTREAM: frame header length was %i\n",
1479             get_bits_count(gb) - s->frame_offset);
1480 
1481     /** reset subframe states */
1482     s->parsed_all_subframes = 0;
1483     for (i = 0; i < s->nb_channels; i++) {
1484         s->channel[i].decoded_samples = 0;
1485         s->channel[i].cur_subframe    = 0;
1486         s->channel[i].reuse_sf        = 0;
1487     }
1488 
1489     /** decode all subframes */
1490     while (!s->parsed_all_subframes) {
1491         if (decode_subframe(s) < 0) {
1492             s->packet_loss = 1;
1493             return 0;
1494         }
1495     }
1496 
1497     /** copy samples to the output buffer */
1498     for (i = 0; i < s->nb_channels; i++)
1499         memcpy(frame->extended_data[i], s->channel[i].out,
1500                s->samples_per_frame * sizeof(*s->channel[i].out));
1501 
1502     for (i = 0; i < s->nb_channels; i++) {
1503         /** reuse second half of the IMDCT output for the next frame */
1504         memcpy(&s->channel[i].out[0],
1505                &s->channel[i].out[s->samples_per_frame],
1506                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1507     }
1508 
1509     if (s->skip_frame) {
1510         s->skip_frame = 0;
1511         *got_frame_ptr = 0;
1512         av_frame_unref(frame);
1513     } else {
1514         *got_frame_ptr = 1;
1515     }
1516 
1517     if (s->len_prefix) {
1518         if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1519             /** FIXME: not sure if this is always an error */
1520             av_log(s->avctx, AV_LOG_ERROR,
1521                    "frame[%"PRIu32"] would have to skip %i bits\n",
1522                    s->frame_num,
1523                    len - (get_bits_count(gb) - s->frame_offset) - 1);
1524             s->packet_loss = 1;
1525             return 0;
1526         }
1527 
1528         /** skip the rest of the frame data */
1529         skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1530     } else {
1531         while (get_bits_count(gb) < s->num_saved_bits && get_bits1(gb) == 0) {
1532         }
1533     }
1534 
1535     /** decode trailer bit */
1536     more_frames = get_bits1(gb);
1537 
1538     ++s->frame_num;
1539     return more_frames;
1540 }
1541 
1542 /**
1543  *@brief Calculate remaining input buffer length.
1544  *@param s codec context
1545  *@param gb bitstream reader context
1546  *@return remaining size in bits
1547  */
remaining_bits(WMAProDecodeCtx * s,GetBitContext * gb)1548 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1549 {
1550     return s->buf_bit_size - get_bits_count(gb);
1551 }
1552 
1553 /**
1554  *@brief Fill the bit reservoir with a (partial) frame.
1555  *@param s codec context
1556  *@param gb bitstream reader context
1557  *@param len length of the partial frame
1558  *@param append decides whether to reset the buffer or not
1559  */
save_bits(WMAProDecodeCtx * s,GetBitContext * gb,int len,int append)1560 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1561                       int append)
1562 {
1563     int buflen;
1564 
1565     /** when the frame data does not need to be concatenated, the input buffer
1566         is reset and additional bits from the previous frame are copied
1567         and skipped later so that a fast byte copy is possible */
1568 
1569     if (!append) {
1570         s->frame_offset = get_bits_count(gb) & 7;
1571         s->num_saved_bits = s->frame_offset;
1572         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1573         buflen = (s->num_saved_bits      + len + 7) >> 3;
1574     } else
1575         buflen = (put_bits_count(&s->pb) + len + 7) >> 3;
1576 
1577     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1578         avpriv_request_sample(s->avctx, "Too small input buffer");
1579         s->packet_loss = 1;
1580         return;
1581     }
1582 
1583     av_assert0(len <= put_bits_left(&s->pb));
1584 
1585     s->num_saved_bits += len;
1586     if (!append) {
1587         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1588                      s->num_saved_bits);
1589     } else {
1590         int align = 8 - (get_bits_count(gb) & 7);
1591         align = FFMIN(align, len);
1592         put_bits(&s->pb, align, get_bits(gb, align));
1593         len -= align;
1594         ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1595     }
1596     skip_bits_long(gb, len);
1597 
1598     {
1599         PutBitContext tmp = s->pb;
1600         flush_put_bits(&tmp);
1601     }
1602 
1603     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1604     skip_bits(&s->gb, s->frame_offset);
1605 }
1606 
decode_packet(AVCodecContext * avctx,WMAProDecodeCtx * s,void * data,int * got_frame_ptr,AVPacket * avpkt)1607 static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s,
1608                          void *data, int *got_frame_ptr, AVPacket *avpkt)
1609 {
1610     GetBitContext* gb  = &s->pgb;
1611     const uint8_t* buf = avpkt->data;
1612     int buf_size       = avpkt->size;
1613     int num_bits_prev_frame;
1614     int packet_sequence_number;
1615 
1616     *got_frame_ptr = 0;
1617 
1618     if (!buf_size) {
1619         AVFrame *frame = data;
1620         int i;
1621 
1622         /** Must output remaining samples after stream end. WMAPRO 5.1 created
1623          * by XWMA encoder don't though (maybe only 1/2ch streams need it). */
1624         s->packet_done = 0;
1625         if (s->eof_done)
1626             return 0;
1627 
1628         /** clean output buffer and copy last IMDCT samples */
1629         for (i = 0; i < s->nb_channels; i++) {
1630             memset(frame->extended_data[i], 0,
1631             s->samples_per_frame * sizeof(*s->channel[i].out));
1632 
1633             memcpy(frame->extended_data[i], s->channel[i].out,
1634                    s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1635         }
1636 
1637         /* TODO: XMA should output 128 samples only (instead of 512) and WMAPRO
1638          * maybe 768 (with 2048), XMA needs changes in multi-stream handling though. */
1639 
1640         s->eof_done = 1;
1641         s->packet_done = 1;
1642         *got_frame_ptr = 1;
1643         return 0;
1644     }
1645     else if (s->packet_done || s->packet_loss) {
1646         s->packet_done = 0;
1647 
1648         /** sanity check for the buffer length */
1649         if (avctx->codec_id == AV_CODEC_ID_WMAPRO && buf_size < avctx->block_align) {
1650             av_log(avctx, AV_LOG_ERROR, "Input packet too small (%d < %d)\n",
1651                    buf_size, avctx->block_align);
1652             s->packet_loss = 1;
1653             return AVERROR_INVALIDDATA;
1654         }
1655 
1656         if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1657             s->next_packet_start = buf_size - avctx->block_align;
1658             buf_size = avctx->block_align;
1659         } else {
1660             s->next_packet_start = buf_size - FFMIN(buf_size, avctx->block_align);
1661             buf_size = FFMIN(buf_size, avctx->block_align);
1662         }
1663         s->buf_bit_size = buf_size << 3;
1664 
1665         /** parse packet header */
1666         init_get_bits(gb, buf, s->buf_bit_size);
1667         if (avctx->codec_id != AV_CODEC_ID_XMA2) {
1668             packet_sequence_number = get_bits(gb, 4);
1669             skip_bits(gb, 2);
1670         } else {
1671             int num_frames = get_bits(gb, 6);
1672             ff_dlog(avctx, "packet[%d]: number of frames %d\n", avctx->frame_number, num_frames);
1673             packet_sequence_number = 0;
1674         }
1675 
1676         /** get number of bits that need to be added to the previous frame */
1677         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1678         if (avctx->codec_id != AV_CODEC_ID_WMAPRO) {
1679             skip_bits(gb, 3);
1680             s->skip_packets = get_bits(gb, 8);
1681             ff_dlog(avctx, "packet[%d]: skip packets %d\n", avctx->frame_number, s->skip_packets);
1682         }
1683 
1684         ff_dlog(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
1685                 num_bits_prev_frame);
1686 
1687         /** check for packet loss */
1688         if (avctx->codec_id == AV_CODEC_ID_WMAPRO && !s->packet_loss &&
1689             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1690             s->packet_loss = 1;
1691             av_log(avctx, AV_LOG_ERROR,
1692                    "Packet loss detected! seq %"PRIx8" vs %x\n",
1693                    s->packet_sequence_number, packet_sequence_number);
1694         }
1695         s->packet_sequence_number = packet_sequence_number;
1696 
1697         if (num_bits_prev_frame > 0) {
1698             int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1699             if (num_bits_prev_frame >= remaining_packet_bits) {
1700                 num_bits_prev_frame = remaining_packet_bits;
1701                 s->packet_done = 1;
1702             }
1703 
1704             /** append the previous frame data to the remaining data from the
1705                 previous packet to create a full frame */
1706             save_bits(s, gb, num_bits_prev_frame, 1);
1707             ff_dlog(avctx, "accumulated %x bits of frame data\n",
1708                     s->num_saved_bits - s->frame_offset);
1709 
1710             /** decode the cross packet frame if it is valid */
1711             if (!s->packet_loss)
1712                 decode_frame(s, data, got_frame_ptr);
1713         } else if (s->num_saved_bits - s->frame_offset) {
1714             ff_dlog(avctx, "ignoring %x previously saved bits\n",
1715                     s->num_saved_bits - s->frame_offset);
1716         }
1717 
1718         if (s->packet_loss) {
1719             /** reset number of saved bits so that the decoder
1720                 does not start to decode incomplete frames in the
1721                 s->len_prefix == 0 case */
1722             s->num_saved_bits = 0;
1723             s->packet_loss = 0;
1724         }
1725     } else {
1726         int frame_size;
1727 
1728         if (avpkt->size < s->next_packet_start) {
1729             s->packet_loss = 1;
1730             return AVERROR_INVALIDDATA;
1731         }
1732 
1733         s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1734         init_get_bits(gb, avpkt->data, s->buf_bit_size);
1735         skip_bits(gb, s->packet_offset);
1736         if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1737             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1738             frame_size <= remaining_bits(s, gb)) {
1739             save_bits(s, gb, frame_size, 0);
1740             if (!s->packet_loss)
1741                 s->packet_done = !decode_frame(s, data, got_frame_ptr);
1742         } else if (!s->len_prefix
1743                    && s->num_saved_bits > get_bits_count(&s->gb)) {
1744             /** when the frames do not have a length prefix, we don't know
1745                 the compressed length of the individual frames
1746                 however, we know what part of a new packet belongs to the
1747                 previous frame
1748                 therefore we save the incoming packet first, then we append
1749                 the "previous frame" data from the next packet so that
1750                 we get a buffer that only contains full frames */
1751             s->packet_done = !decode_frame(s, data, got_frame_ptr);
1752         } else {
1753             s->packet_done = 1;
1754         }
1755     }
1756 
1757     if (remaining_bits(s, gb) < 0) {
1758         av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -remaining_bits(s, gb));
1759         s->packet_loss = 1;
1760     }
1761 
1762     if (s->packet_done && !s->packet_loss &&
1763         remaining_bits(s, gb) > 0) {
1764         /** save the rest of the data so that it can be decoded
1765             with the next packet */
1766         save_bits(s, gb, remaining_bits(s, gb), 0);
1767     }
1768 
1769     s->packet_offset = get_bits_count(gb) & 7;
1770     if (s->packet_loss)
1771         return AVERROR_INVALIDDATA;
1772 
1773     return get_bits_count(gb) >> 3;
1774 }
1775 
1776 /**
1777  *@brief Decode a single WMA packet.
1778  *@param avctx codec context
1779  *@param data the output buffer
1780  *@param avpkt input packet
1781  *@return number of bytes that were read from the input buffer
1782  */
wmapro_decode_packet(AVCodecContext * avctx,void * data,int * got_frame_ptr,AVPacket * avpkt)1783 static int wmapro_decode_packet(AVCodecContext *avctx, void *data,
1784                                 int *got_frame_ptr, AVPacket *avpkt)
1785 {
1786     WMAProDecodeCtx *s = avctx->priv_data;
1787     AVFrame *frame = data;
1788     int ret;
1789 
1790     /* get output buffer */
1791     frame->nb_samples = s->samples_per_frame;
1792     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
1793         s->packet_loss = 1;
1794         return 0;
1795     }
1796 
1797     return decode_packet(avctx, s, data, got_frame_ptr, avpkt);
1798 }
1799 
xma_decode_packet(AVCodecContext * avctx,void * data,int * got_frame_ptr,AVPacket * avpkt)1800 static int xma_decode_packet(AVCodecContext *avctx, void *data,
1801                              int *got_frame_ptr, AVPacket *avpkt)
1802 {
1803     XMADecodeCtx *s = avctx->priv_data;
1804     int got_stream_frame_ptr = 0;
1805     AVFrame *frame = data;
1806     int i, ret, offset = INT_MAX;
1807 
1808     if (!s->frames[s->current_stream]->data[0]) {
1809         s->frames[s->current_stream]->nb_samples = 512;
1810         if ((ret = ff_get_buffer(avctx, s->frames[s->current_stream], 0)) < 0) {
1811             return ret;
1812         }
1813     }
1814     /* decode current stream packet */
1815     ret = decode_packet(avctx, &s->xma[s->current_stream], s->frames[s->current_stream],
1816                         &got_stream_frame_ptr, avpkt);
1817 
1818     if (got_stream_frame_ptr && s->offset[s->current_stream] >= 64) {
1819         got_stream_frame_ptr = 0;
1820         ret = AVERROR_INVALIDDATA;
1821     }
1822 
1823     /* copy stream samples (1/2ch) to sample buffer (Nch) */
1824     if (got_stream_frame_ptr) {
1825         int start_ch = s->start_channel[s->current_stream];
1826         memcpy(&s->samples[start_ch + 0][s->offset[s->current_stream] * 512],
1827                s->frames[s->current_stream]->extended_data[0], 512 * 4);
1828         if (s->xma[s->current_stream].nb_channels > 1)
1829             memcpy(&s->samples[start_ch + 1][s->offset[s->current_stream] * 512],
1830                    s->frames[s->current_stream]->extended_data[1], 512 * 4);
1831         s->offset[s->current_stream]++;
1832     } else if (ret < 0) {
1833         memset(s->offset, 0, sizeof(s->offset));
1834         s->current_stream = 0;
1835         return ret;
1836     }
1837 
1838     /* find next XMA packet's owner stream, and update.
1839      * XMA streams find their packets following packet_skips
1840      * (at start there is one packet per stream, then interleave non-linearly). */
1841     if (s->xma[s->current_stream].packet_done ||
1842         s->xma[s->current_stream].packet_loss) {
1843 
1844         /* select stream with 0 skip_packets (= uses next packet) */
1845         if (s->xma[s->current_stream].skip_packets != 0) {
1846             int min[2];
1847 
1848             min[0] = s->xma[0].skip_packets;
1849             min[1] = i = 0;
1850 
1851             for (i = 1; i < s->num_streams; i++) {
1852                 if (s->xma[i].skip_packets < min[0]) {
1853                     min[0] = s->xma[i].skip_packets;
1854                     min[1] = i;
1855                 }
1856             }
1857 
1858             s->current_stream = min[1];
1859         }
1860 
1861         /* all other streams skip next packet */
1862         for (i = 0; i < s->num_streams; i++) {
1863             s->xma[i].skip_packets = FFMAX(0, s->xma[i].skip_packets - 1);
1864         }
1865 
1866         /* copy samples from buffer to output if possible */
1867         for (i = 0; i < s->num_streams; i++) {
1868             offset = FFMIN(offset, s->offset[i]);
1869         }
1870         if (offset > 0) {
1871             int bret;
1872 
1873             frame->nb_samples = 512 * offset;
1874             if ((bret = ff_get_buffer(avctx, frame, 0)) < 0)
1875                 return bret;
1876 
1877             /* copy samples buffer (Nch) to frame samples (Nch), move unconsumed samples */
1878             for (i = 0; i < s->num_streams; i++) {
1879                 int start_ch = s->start_channel[i];
1880                 memcpy(frame->extended_data[start_ch + 0], s->samples[start_ch + 0], frame->nb_samples * 4);
1881                 if (s->xma[i].nb_channels > 1)
1882                     memcpy(frame->extended_data[start_ch + 1], s->samples[start_ch + 1], frame->nb_samples * 4);
1883 
1884                 s->offset[i] -= offset;
1885                 if (s->offset[i]) {
1886                     memmove(s->samples[start_ch + 0], s->samples[start_ch + 0] + frame->nb_samples, s->offset[i] * 4 * 512);
1887                     if (s->xma[i].nb_channels > 1)
1888                         memmove(s->samples[start_ch + 1], s->samples[start_ch + 1] + frame->nb_samples, s->offset[i] * 4 * 512);
1889                 }
1890             }
1891 
1892             *got_frame_ptr = 1;
1893         }
1894     }
1895 
1896     return ret;
1897 }
1898 
xma_decode_init(AVCodecContext * avctx)1899 static av_cold int xma_decode_init(AVCodecContext *avctx)
1900 {
1901     XMADecodeCtx *s = avctx->priv_data;
1902     int i, ret, start_channels = 0;
1903 
1904     if (avctx->channels <= 0 || avctx->extradata_size == 0)
1905         return AVERROR_INVALIDDATA;
1906 
1907     /* get stream config */
1908     if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
1909         s->num_streams = (avctx->channels + 1) / 2;
1910     } else if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size >= 2) { /* XMA2WAVEFORMAT */
1911         s->num_streams = avctx->extradata[1];
1912         if (avctx->extradata_size != (32 + ((avctx->extradata[0]==3)?0:8) + 4*s->num_streams)) {
1913             av_log(avctx, AV_LOG_ERROR, "Incorrect XMA2 extradata size\n");
1914             s->num_streams = 0;
1915             return AVERROR(EINVAL);
1916         }
1917     } else if (avctx->codec_id == AV_CODEC_ID_XMA1 && avctx->extradata_size >= 4) { /* XMAWAVEFORMAT */
1918         s->num_streams = avctx->extradata[4];
1919         if (avctx->extradata_size != (8 + 20*s->num_streams)) {
1920             av_log(avctx, AV_LOG_ERROR, "Incorrect XMA1 extradata size\n");
1921             s->num_streams = 0;
1922             return AVERROR(EINVAL);
1923         }
1924     } else {
1925         av_log(avctx, AV_LOG_ERROR, "Incorrect XMA config\n");
1926         return AVERROR(EINVAL);
1927     }
1928 
1929     /* encoder supports up to 64 streams / 64*2 channels (would have to alloc arrays) */
1930     if (avctx->channels > XMA_MAX_CHANNELS || s->num_streams > XMA_MAX_STREAMS ||
1931         s->num_streams <= 0
1932     ) {
1933         avpriv_request_sample(avctx, "More than %d channels in %d streams", XMA_MAX_CHANNELS, s->num_streams);
1934         s->num_streams = 0;
1935         return AVERROR_PATCHWELCOME;
1936     }
1937 
1938     /* init all streams (several streams of 1/2ch make Nch files) */
1939     for (i = 0; i < s->num_streams; i++) {
1940         ret = decode_init(&s->xma[i], avctx, i);
1941         if (ret < 0)
1942             return ret;
1943         s->frames[i] = av_frame_alloc();
1944         if (!s->frames[i])
1945             return AVERROR(ENOMEM);
1946 
1947         s->start_channel[i] = start_channels;
1948         start_channels += s->xma[i].nb_channels;
1949     }
1950     if (start_channels != avctx->channels)
1951         return AVERROR_INVALIDDATA;
1952 
1953     return ret;
1954 }
1955 
xma_decode_end(AVCodecContext * avctx)1956 static av_cold int xma_decode_end(AVCodecContext *avctx)
1957 {
1958     XMADecodeCtx *s = avctx->priv_data;
1959     int i;
1960 
1961     for (i = 0; i < s->num_streams; i++) {
1962         decode_end(&s->xma[i]);
1963         av_frame_free(&s->frames[i]);
1964     }
1965     s->num_streams = 0;
1966 
1967     return 0;
1968 }
1969 
flush(WMAProDecodeCtx * s)1970 static void flush(WMAProDecodeCtx *s)
1971 {
1972     int i;
1973     /** reset output buffer as a part of it is used during the windowing of a
1974         new frame */
1975     for (i = 0; i < s->nb_channels; i++)
1976         memset(s->channel[i].out, 0, s->samples_per_frame *
1977                sizeof(*s->channel[i].out));
1978     s->packet_loss = 1;
1979     s->skip_packets = 0;
1980     s->eof_done = 0;
1981 }
1982 
1983 
1984 /**
1985  *@brief Clear decoder buffers (for seeking).
1986  *@param avctx codec context
1987  */
wmapro_flush(AVCodecContext * avctx)1988 static void wmapro_flush(AVCodecContext *avctx)
1989 {
1990     WMAProDecodeCtx *s = avctx->priv_data;
1991 
1992     flush(s);
1993 }
1994 
xma_flush(AVCodecContext * avctx)1995 static void xma_flush(AVCodecContext *avctx)
1996 {
1997     XMADecodeCtx *s = avctx->priv_data;
1998     int i;
1999 
2000     for (i = 0; i < s->num_streams; i++)
2001         flush(&s->xma[i]);
2002 
2003     memset(s->offset, 0, sizeof(s->offset));
2004     s->current_stream = 0;
2005 }
2006 
2007 
2008 /**
2009  *@brief wmapro decoder
2010  */
2011 AVCodec ff_wmapro_decoder = {
2012     .name           = "wmapro",
2013     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
2014     .type           = AVMEDIA_TYPE_AUDIO,
2015     .id             = AV_CODEC_ID_WMAPRO,
2016     .priv_data_size = sizeof(WMAProDecodeCtx),
2017     .init           = wmapro_decode_init,
2018     .close          = wmapro_decode_end,
2019     .decode         = wmapro_decode_packet,
2020     .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
2021     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
2022     .flush          = wmapro_flush,
2023     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2024                                                       AV_SAMPLE_FMT_NONE },
2025 };
2026 
2027 AVCodec ff_xma1_decoder = {
2028     .name           = "xma1",
2029     .long_name      = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
2030     .type           = AVMEDIA_TYPE_AUDIO,
2031     .id             = AV_CODEC_ID_XMA1,
2032     .priv_data_size = sizeof(XMADecodeCtx),
2033     .init           = xma_decode_init,
2034     .close          = xma_decode_end,
2035     .decode         = xma_decode_packet,
2036     .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2037     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
2038     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2039                                                       AV_SAMPLE_FMT_NONE },
2040 };
2041 
2042 AVCodec ff_xma2_decoder = {
2043     .name           = "xma2",
2044     .long_name      = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
2045     .type           = AVMEDIA_TYPE_AUDIO,
2046     .id             = AV_CODEC_ID_XMA2,
2047     .priv_data_size = sizeof(XMADecodeCtx),
2048     .init           = xma_decode_init,
2049     .close          = xma_decode_end,
2050     .decode         = xma_decode_packet,
2051     .flush          = xma_flush,
2052     .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2053     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
2054     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2055                                                       AV_SAMPLE_FMT_NONE },
2056 };
2057