1 /*
2 * Wmapro compatible decoder
3 * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
4 * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
5 *
6 * This file is part of FFmpeg.
7 *
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 /**
24 * @file
25 * @brief wmapro decoder implementation
26 * Wmapro is an MDCT based codec comparable to wma standard or AAC.
27 * The decoding therefore consists of the following steps:
28 * - bitstream decoding
29 * - reconstruction of per-channel data
30 * - rescaling and inverse quantization
31 * - IMDCT
32 * - windowing and overlapp-add
33 *
34 * The compressed wmapro bitstream is split into individual packets.
35 * Every such packet contains one or more wma frames.
36 * The compressed frames may have a variable length and frames may
37 * cross packet boundaries.
38 * Common to all wmapro frames is the number of samples that are stored in
39 * a frame.
40 * The number of samples and a few other decode flags are stored
41 * as extradata that has to be passed to the decoder.
42 *
43 * The wmapro frames themselves are again split into a variable number of
44 * subframes. Every subframe contains the data for 2^N time domain samples
45 * where N varies between 7 and 12.
46 *
47 * Example wmapro bitstream (in samples):
48 *
49 * || packet 0 || packet 1 || packet 2 packets
50 * ---------------------------------------------------
51 * || frame 0 || frame 1 || frame 2 || frames
52 * ---------------------------------------------------
53 * || | | || | | | || || subframes of channel 0
54 * ---------------------------------------------------
55 * || | | || | | | || || subframes of channel 1
56 * ---------------------------------------------------
57 *
58 * The frame layouts for the individual channels of a wma frame does not need
59 * to be the same.
60 *
61 * However, if the offsets and lengths of several subframes of a frame are the
62 * same, the subframes of the channels can be grouped.
63 * Every group may then use special coding techniques like M/S stereo coding
64 * to improve the compression ratio. These channel transformations do not
65 * need to be applied to a whole subframe. Instead, they can also work on
66 * individual scale factor bands (see below).
67 * The coefficients that carry the audio signal in the frequency domain
68 * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
69 * In addition to that, the encoder can switch to a runlevel coding scheme
70 * by transmitting subframe_length / 128 zero coefficients.
71 *
72 * Before the audio signal can be converted to the time domain, the
73 * coefficients have to be rescaled and inverse quantized.
74 * A subframe is therefore split into several scale factor bands that get
75 * scaled individually.
76 * Scale factors are submitted for every frame but they might be shared
77 * between the subframes of a channel. Scale factors are initially DPCM-coded.
78 * Once scale factors are shared, the differences are transmitted as runlevel
79 * codes.
80 * Every subframe length and offset combination in the frame layout shares a
81 * common quantization factor that can be adjusted for every channel by a
82 * modifier.
83 * After the inverse quantization, the coefficients get processed by an IMDCT.
84 * The resulting values are then windowed with a sine window and the first half
85 * of the values are added to the second half of the output from the previous
86 * subframe in order to reconstruct the output samples.
87 */
88
89 #include <inttypes.h>
90
91 #include "libavutil/audio_fifo.h"
92 #include "libavutil/ffmath.h"
93 #include "libavutil/float_dsp.h"
94 #include "libavutil/intfloat.h"
95 #include "libavutil/intreadwrite.h"
96 #include "libavutil/mem_internal.h"
97 #include "libavutil/thread.h"
98
99 #include "avcodec.h"
100 #include "codec_internal.h"
101 #include "internal.h"
102 #include "get_bits.h"
103 #include "put_bits.h"
104 #include "wmaprodata.h"
105 #include "sinewin.h"
106 #include "wma.h"
107 #include "wma_common.h"
108
109 /** current decoder limitations */
110 #define WMAPRO_MAX_CHANNELS 8 ///< max number of handled channels
111 #define MAX_SUBFRAMES 32 ///< max number of subframes per channel
112 #define MAX_BANDS 29 ///< max number of scale factor bands
113 #define MAX_FRAMESIZE 32768 ///< maximum compressed frame size
114 #define XMA_MAX_STREAMS 8
115 #define XMA_MAX_CHANNELS_STREAM 2
116 #define XMA_MAX_CHANNELS (XMA_MAX_STREAMS * XMA_MAX_CHANNELS_STREAM)
117
118 #define WMAPRO_BLOCK_MIN_BITS 6 ///< log2 of min block size
119 #define WMAPRO_BLOCK_MAX_BITS 13 ///< log2 of max block size
120 #define WMAPRO_BLOCK_MIN_SIZE (1 << WMAPRO_BLOCK_MIN_BITS) ///< minimum block size
121 #define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS) ///< maximum block size
122 #define WMAPRO_BLOCK_SIZES (WMAPRO_BLOCK_MAX_BITS - WMAPRO_BLOCK_MIN_BITS + 1) ///< possible block sizes
123
124
125 #define VLCBITS 9
126 #define SCALEVLCBITS 8
127 #define VEC4MAXDEPTH ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
128 #define VEC2MAXDEPTH ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
129 #define VEC1MAXDEPTH ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
130 #define SCALEMAXDEPTH ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
131 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
132
133 static VLC sf_vlc; ///< scale factor DPCM vlc
134 static VLC sf_rl_vlc; ///< scale factor run length vlc
135 static VLC vec4_vlc; ///< 4 coefficients per symbol
136 static VLC vec2_vlc; ///< 2 coefficients per symbol
137 static VLC vec1_vlc; ///< 1 coefficient per symbol
138 static VLC coef_vlc[2]; ///< coefficient run length vlc codes
139 static float sin64[33]; ///< sine table for decorrelation
140
141 /**
142 * @brief frame specific decoder context for a single channel
143 */
144 typedef struct WMAProChannelCtx {
145 int16_t prev_block_len; ///< length of the previous block
146 uint8_t transmit_coefs;
147 uint8_t num_subframes;
148 uint16_t subframe_len[MAX_SUBFRAMES]; ///< subframe length in samples
149 uint16_t subframe_offset[MAX_SUBFRAMES]; ///< subframe positions in the current frame
150 uint8_t cur_subframe; ///< current subframe number
151 uint16_t decoded_samples; ///< number of already processed samples
152 uint8_t grouped; ///< channel is part of a group
153 int quant_step; ///< quantization step for the current subframe
154 int8_t reuse_sf; ///< share scale factors between subframes
155 int8_t scale_factor_step; ///< scaling step for the current subframe
156 int max_scale_factor; ///< maximum scale factor for the current subframe
157 int saved_scale_factors[2][MAX_BANDS]; ///< resampled and (previously) transmitted scale factor values
158 int8_t scale_factor_idx; ///< index for the transmitted scale factor values (used for resampling)
159 int* scale_factors; ///< pointer to the scale factor values used for decoding
160 uint8_t table_idx; ///< index in sf_offsets for the scale factor reference block
161 float* coeffs; ///< pointer to the subframe decode buffer
162 uint16_t num_vec_coeffs; ///< number of vector coded coefficients
163 DECLARE_ALIGNED(32, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
164 } WMAProChannelCtx;
165
166 /**
167 * @brief channel group for channel transformations
168 */
169 typedef struct WMAProChannelGrp {
170 uint8_t num_channels; ///< number of channels in the group
171 int8_t transform; ///< transform on / off
172 int8_t transform_band[MAX_BANDS]; ///< controls if the transform is enabled for a certain band
173 float decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
174 float* channel_data[WMAPRO_MAX_CHANNELS]; ///< transformation coefficients
175 } WMAProChannelGrp;
176
177 /**
178 * @brief main decoder context
179 */
180 typedef struct WMAProDecodeCtx {
181 /* generic decoder variables */
182 AVCodecContext* avctx; ///< codec context for av_log
183 AVFloatDSPContext *fdsp;
184 uint8_t frame_data[MAX_FRAMESIZE +
185 AV_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
186 PutBitContext pb; ///< context for filling the frame_data buffer
187 FFTContext mdct_ctx[WMAPRO_BLOCK_SIZES]; ///< MDCT context per block size
188 DECLARE_ALIGNED(32, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
189 const float* windows[WMAPRO_BLOCK_SIZES]; ///< windows for the different block sizes
190
191 /* frame size dependent frame information (set during initialization) */
192 uint32_t decode_flags; ///< used compression features
193 uint8_t len_prefix; ///< frame is prefixed with its length
194 uint8_t dynamic_range_compression; ///< frame contains DRC data
195 uint8_t bits_per_sample; ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
196 uint16_t samples_per_frame; ///< number of samples to output
197 uint16_t trim_start; ///< number of samples to skip at start
198 uint16_t trim_end; ///< number of samples to skip at end
199 uint16_t log2_frame_size;
200 int8_t lfe_channel; ///< lfe channel index
201 uint8_t max_num_subframes;
202 uint8_t subframe_len_bits; ///< number of bits used for the subframe length
203 uint8_t max_subframe_len_bit; ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
204 uint16_t min_samples_per_subframe;
205 int8_t num_sfb[WMAPRO_BLOCK_SIZES]; ///< scale factor bands per block size
206 int16_t sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor band offsets (multiples of 4)
207 int8_t sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
208 int16_t subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
209
210 /* packet decode state */
211 GetBitContext pgb; ///< bitstream reader context for the packet
212 int next_packet_start; ///< start offset of the next wma packet in the demuxer packet
213 uint8_t packet_offset; ///< frame offset in the packet
214 uint8_t packet_sequence_number; ///< current packet number
215 int num_saved_bits; ///< saved number of bits
216 int frame_offset; ///< frame offset in the bit reservoir
217 int subframe_offset; ///< subframe offset in the bit reservoir
218 uint8_t packet_loss; ///< set in case of bitstream error
219 uint8_t packet_done; ///< set when a packet is fully decoded
220 uint8_t eof_done; ///< set when EOF reached and extra subframe is written (XMA1/2)
221
222 /* frame decode state */
223 uint32_t frame_num; ///< current frame number (not used for decoding)
224 GetBitContext gb; ///< bitstream reader context
225 int buf_bit_size; ///< buffer size in bits
226 uint8_t drc_gain; ///< gain for the DRC tool
227 int8_t skip_frame; ///< skip output step
228 int8_t parsed_all_subframes; ///< all subframes decoded?
229 uint8_t skip_packets; ///< packets to skip to find next packet in a stream (XMA1/2)
230
231 /* subframe/block decode state */
232 int16_t subframe_len; ///< current subframe length
233 int8_t nb_channels; ///< number of channels in stream (XMA1/2)
234 int8_t channels_for_cur_subframe; ///< number of channels that contain the subframe
235 int8_t channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
236 int8_t num_bands; ///< number of scale factor bands
237 int8_t transmit_num_vec_coeffs; ///< number of vector coded coefficients is part of the bitstream
238 int16_t* cur_sfb_offsets; ///< sfb offsets for the current block
239 uint8_t table_idx; ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
240 int8_t esc_len; ///< length of escaped coefficients
241
242 uint8_t num_chgroups; ///< number of channel groups
243 WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS]; ///< channel group information
244
245 WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS]; ///< per channel data
246 } WMAProDecodeCtx;
247
248 typedef struct XMADecodeCtx {
249 WMAProDecodeCtx xma[XMA_MAX_STREAMS];
250 AVFrame *frames[XMA_MAX_STREAMS];
251 int current_stream;
252 int num_streams;
253 AVAudioFifo *samples[2][XMA_MAX_STREAMS];
254 int start_channel[XMA_MAX_STREAMS];
255 int trim_start, trim_end;
256 int flushed;
257 } XMADecodeCtx;
258
259 /**
260 *@brief helper function to print the most important members of the context
261 *@param s context
262 */
dump_context(WMAProDecodeCtx * s)263 static av_cold void dump_context(WMAProDecodeCtx *s)
264 {
265 #define PRINT(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
266 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %"PRIx32"\n", a, b);
267
268 PRINT("ed sample bit depth", s->bits_per_sample);
269 PRINT_HEX("ed decode flags", s->decode_flags);
270 PRINT("samples per frame", s->samples_per_frame);
271 PRINT("log2 frame size", s->log2_frame_size);
272 PRINT("max num subframes", s->max_num_subframes);
273 PRINT("len prefix", s->len_prefix);
274 PRINT("num channels", s->nb_channels);
275 }
276
277 /**
278 *@brief Uninitialize the decoder and free all resources.
279 *@param avctx codec context
280 *@return 0 on success, < 0 otherwise
281 */
decode_end(WMAProDecodeCtx * s)282 static av_cold int decode_end(WMAProDecodeCtx *s)
283 {
284 int i;
285
286 av_freep(&s->fdsp);
287
288 for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
289 ff_mdct_end(&s->mdct_ctx[i]);
290
291 return 0;
292 }
293
wmapro_decode_end(AVCodecContext * avctx)294 static av_cold int wmapro_decode_end(AVCodecContext *avctx)
295 {
296 WMAProDecodeCtx *s = avctx->priv_data;
297
298 decode_end(s);
299
300 return 0;
301 }
302
get_rate(AVCodecContext * avctx)303 static av_cold int get_rate(AVCodecContext *avctx)
304 {
305 if (avctx->codec_id != AV_CODEC_ID_WMAPRO) { // XXX: is this really only for XMA?
306 if (avctx->sample_rate > 44100)
307 return 48000;
308 else if (avctx->sample_rate > 32000)
309 return 44100;
310 else if (avctx->sample_rate > 24000)
311 return 32000;
312 return 24000;
313 }
314
315 return avctx->sample_rate;
316 }
317
decode_init_static(void)318 static av_cold void decode_init_static(void)
319 {
320 INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
321 scale_huffbits, 1, 1,
322 scale_huffcodes, 2, 2, 616);
323 INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
324 scale_rl_huffbits, 1, 1,
325 scale_rl_huffcodes, 4, 4, 1406);
326 INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
327 coef0_huffbits, 1, 1,
328 coef0_huffcodes, 4, 4, 2108);
329 INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
330 coef1_huffbits, 1, 1,
331 coef1_huffcodes, 4, 4, 3912);
332 INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
333 vec4_huffbits, 1, 1,
334 vec4_huffcodes, 2, 2, 604);
335 INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
336 vec2_huffbits, 1, 1,
337 vec2_huffcodes, 2, 2, 562);
338 INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
339 vec1_huffbits, 1, 1,
340 vec1_huffcodes, 2, 2, 562);
341
342 /** calculate sine values for the decorrelation matrix */
343 for (int i = 0; i < 33; i++)
344 sin64[i] = sin(i * M_PI / 64.0);
345
346 for (int i = WMAPRO_BLOCK_MIN_BITS; i <= WMAPRO_BLOCK_MAX_BITS; i++)
347 ff_init_ff_sine_windows(i);
348 }
349
350 /**
351 *@brief Initialize the decoder.
352 *@param avctx codec context
353 *@return 0 on success, -1 otherwise
354 */
decode_init(WMAProDecodeCtx * s,AVCodecContext * avctx,int num_stream)355 static av_cold int decode_init(WMAProDecodeCtx *s, AVCodecContext *avctx, int num_stream)
356 {
357 static AVOnce init_static_once = AV_ONCE_INIT;
358 uint8_t *edata_ptr = avctx->extradata;
359 unsigned int channel_mask;
360 int i, bits, ret;
361 int log2_max_num_subframes;
362 int num_possible_block_sizes;
363
364 if (avctx->codec_id == AV_CODEC_ID_XMA1 || avctx->codec_id == AV_CODEC_ID_XMA2)
365 avctx->block_align = 2048;
366
367 if (!avctx->block_align) {
368 av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
369 return AVERROR(EINVAL);
370 }
371
372 s->avctx = avctx;
373
374 init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
375
376 avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
377
378 /** dump the extradata */
379 av_log(avctx, AV_LOG_DEBUG, "extradata:\n");
380 for (i = 0; i < avctx->extradata_size; i++)
381 av_log(avctx, AV_LOG_DEBUG, "[%x] ", avctx->extradata[i]);
382 av_log(avctx, AV_LOG_DEBUG, "\n");
383
384 if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
385 s->decode_flags = 0x10d6;
386 s->bits_per_sample = 16;
387 channel_mask = 0; //AV_RL32(edata_ptr+2); /* not always in expected order */
388 if ((num_stream+1) * XMA_MAX_CHANNELS_STREAM > avctx->ch_layout.nb_channels) /* stream config is 2ch + 2ch + ... + 1/2ch */
389 s->nb_channels = 1;
390 else
391 s->nb_channels = 2;
392 } else if (avctx->codec_id == AV_CODEC_ID_XMA2) { /* XMA2WAVEFORMAT */
393 s->decode_flags = 0x10d6;
394 s->bits_per_sample = 16;
395 channel_mask = 0; /* would need to aggregate from all streams */
396 s->nb_channels = edata_ptr[32 + ((edata_ptr[0]==3)?0:8) + 4*num_stream + 0]; /* nth stream config */
397 } else if (avctx->codec_id == AV_CODEC_ID_XMA1) { /* XMAWAVEFORMAT */
398 s->decode_flags = 0x10d6;
399 s->bits_per_sample = 16;
400 channel_mask = 0; /* would need to aggregate from all streams */
401 s->nb_channels = edata_ptr[8 + 20*num_stream + 17]; /* nth stream config */
402 } else if (avctx->codec_id == AV_CODEC_ID_WMAPRO && avctx->extradata_size >= 18) {
403 s->decode_flags = AV_RL16(edata_ptr+14);
404 channel_mask = AV_RL32(edata_ptr+2);
405 s->bits_per_sample = AV_RL16(edata_ptr);
406 s->nb_channels = channel_mask ? av_popcount(channel_mask) : avctx->ch_layout.nb_channels;
407
408 if (s->bits_per_sample > 32 || s->bits_per_sample < 1) {
409 avpriv_request_sample(avctx, "bits per sample is %d", s->bits_per_sample);
410 return AVERROR_PATCHWELCOME;
411 }
412 } else {
413 avpriv_request_sample(avctx, "Unknown extradata size");
414 return AVERROR_PATCHWELCOME;
415 }
416
417 /** generic init */
418 s->log2_frame_size = av_log2(avctx->block_align) + 4;
419 if (s->log2_frame_size > 25) {
420 avpriv_request_sample(avctx, "Large block align");
421 return AVERROR_PATCHWELCOME;
422 }
423
424 /** frame info */
425 s->skip_frame = 1; /* skip first frame */
426
427 s->packet_loss = 1;
428 s->len_prefix = (s->decode_flags & 0x40);
429
430 /** get frame len */
431 if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
432 bits = ff_wma_get_frame_len_bits(avctx->sample_rate, 3, s->decode_flags);
433 if (bits > WMAPRO_BLOCK_MAX_BITS) {
434 avpriv_request_sample(avctx, "14-bit block sizes");
435 return AVERROR_PATCHWELCOME;
436 }
437 s->samples_per_frame = 1 << bits;
438 } else {
439 s->samples_per_frame = 512;
440 }
441
442 /** subframe info */
443 log2_max_num_subframes = ((s->decode_flags & 0x38) >> 3);
444 s->max_num_subframes = 1 << log2_max_num_subframes;
445 if (s->max_num_subframes == 16 || s->max_num_subframes == 4)
446 s->max_subframe_len_bit = 1;
447 s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
448
449 num_possible_block_sizes = log2_max_num_subframes + 1;
450 s->min_samples_per_subframe = s->samples_per_frame / s->max_num_subframes;
451 s->dynamic_range_compression = (s->decode_flags & 0x80);
452
453 if (s->max_num_subframes > MAX_SUBFRAMES) {
454 av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %"PRId8"\n",
455 s->max_num_subframes);
456 return AVERROR_INVALIDDATA;
457 }
458
459 if (s->min_samples_per_subframe < WMAPRO_BLOCK_MIN_SIZE) {
460 av_log(avctx, AV_LOG_ERROR, "min_samples_per_subframe of %d too small\n",
461 s->min_samples_per_subframe);
462 return AVERROR_INVALIDDATA;
463 }
464
465 if (s->avctx->sample_rate <= 0) {
466 av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
467 return AVERROR_INVALIDDATA;
468 }
469
470 if (s->nb_channels <= 0) {
471 av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n",
472 s->nb_channels);
473 return AVERROR_INVALIDDATA;
474 } else if (avctx->codec_id != AV_CODEC_ID_WMAPRO && s->nb_channels > XMA_MAX_CHANNELS_STREAM) {
475 av_log(avctx, AV_LOG_ERROR, "invalid number of channels per XMA stream %d\n",
476 s->nb_channels);
477 return AVERROR_INVALIDDATA;
478 } else if (s->nb_channels > WMAPRO_MAX_CHANNELS || s->nb_channels > avctx->ch_layout.nb_channels) {
479 avpriv_request_sample(avctx,
480 "More than %d channels", WMAPRO_MAX_CHANNELS);
481 return AVERROR_PATCHWELCOME;
482 }
483
484 /** init previous block len */
485 for (i = 0; i < s->nb_channels; i++)
486 s->channel[i].prev_block_len = s->samples_per_frame;
487
488 /** extract lfe channel position */
489 s->lfe_channel = -1;
490
491 if (channel_mask & 8) {
492 unsigned int mask;
493 for (mask = 1; mask < 16; mask <<= 1) {
494 if (channel_mask & mask)
495 ++s->lfe_channel;
496 }
497 }
498
499 /** calculate number of scale factor bands and their offsets
500 for every possible block size */
501 for (i = 0; i < num_possible_block_sizes; i++) {
502 int subframe_len = s->samples_per_frame >> i;
503 int x;
504 int band = 1;
505 int rate = get_rate(avctx);
506
507 s->sfb_offsets[i][0] = 0;
508
509 for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
510 int offset = (subframe_len * 2 * critical_freq[x]) / rate + 2;
511 offset &= ~3;
512 if (offset > s->sfb_offsets[i][band - 1])
513 s->sfb_offsets[i][band++] = offset;
514
515 if (offset >= subframe_len)
516 break;
517 }
518 s->sfb_offsets[i][band - 1] = subframe_len;
519 s->num_sfb[i] = band - 1;
520 if (s->num_sfb[i] <= 0) {
521 av_log(avctx, AV_LOG_ERROR, "num_sfb invalid\n");
522 return AVERROR_INVALIDDATA;
523 }
524 }
525
526
527 /** Scale factors can be shared between blocks of different size
528 as every block has a different scale factor band layout.
529 The matrix sf_offsets is needed to find the correct scale factor.
530 */
531
532 for (i = 0; i < num_possible_block_sizes; i++) {
533 int b;
534 for (b = 0; b < s->num_sfb[i]; b++) {
535 int x;
536 int offset = ((s->sfb_offsets[i][b]
537 + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
538 for (x = 0; x < num_possible_block_sizes; x++) {
539 int v = 0;
540 while (s->sfb_offsets[x][v + 1] << x < offset) {
541 v++;
542 av_assert0(v < MAX_BANDS);
543 }
544 s->sf_offsets[i][x][b] = v;
545 }
546 }
547 }
548
549 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
550 if (!s->fdsp)
551 return AVERROR(ENOMEM);
552
553 /** init MDCT, FIXME: only init needed sizes */
554 for (int i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
555 ret = ff_mdct_init(&s->mdct_ctx[i], WMAPRO_BLOCK_MIN_BITS + 1 + i, 1,
556 1.0 / (1 << (WMAPRO_BLOCK_MIN_BITS + i - 1))
557 / (1ll << (s->bits_per_sample - 1)));
558 if (ret < 0)
559 return ret;
560 }
561
562 /** init MDCT windows: simple sine window */
563 for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
564 const int win_idx = WMAPRO_BLOCK_MAX_BITS - i;
565 s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
566 }
567
568 /** calculate subwoofer cutoff values */
569 for (i = 0; i < num_possible_block_sizes; i++) {
570 int block_size = s->samples_per_frame >> i;
571 int cutoff = (440*block_size + 3LL * (s->avctx->sample_rate >> 1) - 1)
572 / s->avctx->sample_rate;
573 s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
574 }
575
576 if (avctx->debug & FF_DEBUG_BITSTREAM)
577 dump_context(s);
578
579 if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
580 if (channel_mask) {
581 av_channel_layout_uninit(&avctx->ch_layout);
582 av_channel_layout_from_mask(&avctx->ch_layout, channel_mask);
583 } else
584 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
585 }
586
587 ff_thread_once(&init_static_once, decode_init_static);
588
589 return 0;
590 }
591
592 /**
593 *@brief Initialize the decoder.
594 *@param avctx codec context
595 *@return 0 on success, -1 otherwise
596 */
wmapro_decode_init(AVCodecContext * avctx)597 static av_cold int wmapro_decode_init(AVCodecContext *avctx)
598 {
599 WMAProDecodeCtx *s = avctx->priv_data;
600
601 return decode_init(s, avctx, 0);
602 }
603
604 /**
605 *@brief Decode the subframe length.
606 *@param s context
607 *@param offset sample offset in the frame
608 *@return decoded subframe length on success, < 0 in case of an error
609 */
decode_subframe_length(WMAProDecodeCtx * s,int offset)610 static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
611 {
612 int frame_len_shift = 0;
613 int subframe_len;
614
615 /** no need to read from the bitstream when only one length is possible */
616 if (offset == s->samples_per_frame - s->min_samples_per_subframe)
617 return s->min_samples_per_subframe;
618
619 if (get_bits_left(&s->gb) < 1)
620 return AVERROR_INVALIDDATA;
621
622 /** 1 bit indicates if the subframe is of maximum length */
623 if (s->max_subframe_len_bit) {
624 if (get_bits1(&s->gb))
625 frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
626 } else
627 frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
628
629 subframe_len = s->samples_per_frame >> frame_len_shift;
630
631 /** sanity check the length */
632 if (subframe_len < s->min_samples_per_subframe ||
633 subframe_len > s->samples_per_frame) {
634 av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
635 subframe_len);
636 return AVERROR_INVALIDDATA;
637 }
638 return subframe_len;
639 }
640
641 /**
642 *@brief Decode how the data in the frame is split into subframes.
643 * Every WMA frame contains the encoded data for a fixed number of
644 * samples per channel. The data for every channel might be split
645 * into several subframes. This function will reconstruct the list of
646 * subframes for every channel.
647 *
648 * If the subframes are not evenly split, the algorithm estimates the
649 * channels with the lowest number of total samples.
650 * Afterwards, for each of these channels a bit is read from the
651 * bitstream that indicates if the channel contains a subframe with the
652 * next subframe size that is going to be read from the bitstream or not.
653 * If a channel contains such a subframe, the subframe size gets added to
654 * the channel's subframe list.
655 * The algorithm repeats these steps until the frame is properly divided
656 * between the individual channels.
657 *
658 *@param s context
659 *@return 0 on success, < 0 in case of an error
660 */
decode_tilehdr(WMAProDecodeCtx * s)661 static int decode_tilehdr(WMAProDecodeCtx *s)
662 {
663 uint16_t num_samples[WMAPRO_MAX_CHANNELS] = { 0 };/**< sum of samples for all currently known subframes of a channel */
664 uint8_t contains_subframe[WMAPRO_MAX_CHANNELS]; /**< flag indicating if a channel contains the current subframe */
665 int channels_for_cur_subframe = s->nb_channels; /**< number of channels that contain the current subframe */
666 int fixed_channel_layout = 0; /**< flag indicating that all channels use the same subframe offsets and sizes */
667 int min_channel_len = 0; /**< smallest sum of samples (channels with this length will be processed first) */
668 int c;
669
670 /* Should never consume more than 3073 bits (256 iterations for the
671 * while loop when always the minimum amount of 128 samples is subtracted
672 * from missing samples in the 8 channel case).
673 * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS + 4)
674 */
675
676 /** reset tiling information */
677 for (c = 0; c < s->nb_channels; c++)
678 s->channel[c].num_subframes = 0;
679
680 if (s->max_num_subframes == 1 || get_bits1(&s->gb))
681 fixed_channel_layout = 1;
682
683 /** loop until the frame data is split between the subframes */
684 do {
685 int subframe_len;
686
687 /** check which channels contain the subframe */
688 for (c = 0; c < s->nb_channels; c++) {
689 if (num_samples[c] == min_channel_len) {
690 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
691 (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
692 contains_subframe[c] = 1;
693 else
694 contains_subframe[c] = get_bits1(&s->gb);
695 } else
696 contains_subframe[c] = 0;
697 }
698
699 /** get subframe length, subframe_len == 0 is not allowed */
700 if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
701 return AVERROR_INVALIDDATA;
702
703 /** add subframes to the individual channels and find new min_channel_len */
704 min_channel_len += subframe_len;
705 for (c = 0; c < s->nb_channels; c++) {
706 WMAProChannelCtx* chan = &s->channel[c];
707
708 if (contains_subframe[c]) {
709 if (chan->num_subframes >= MAX_SUBFRAMES) {
710 av_log(s->avctx, AV_LOG_ERROR,
711 "broken frame: num subframes > 31\n");
712 return AVERROR_INVALIDDATA;
713 }
714 chan->subframe_len[chan->num_subframes] = subframe_len;
715 num_samples[c] += subframe_len;
716 ++chan->num_subframes;
717 if (num_samples[c] > s->samples_per_frame) {
718 av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
719 "channel len > samples_per_frame\n");
720 return AVERROR_INVALIDDATA;
721 }
722 } else if (num_samples[c] <= min_channel_len) {
723 if (num_samples[c] < min_channel_len) {
724 channels_for_cur_subframe = 0;
725 min_channel_len = num_samples[c];
726 }
727 ++channels_for_cur_subframe;
728 }
729 }
730 } while (min_channel_len < s->samples_per_frame);
731
732 for (c = 0; c < s->nb_channels; c++) {
733 int i;
734 int offset = 0;
735 for (i = 0; i < s->channel[c].num_subframes; i++) {
736 ff_dlog(s->avctx, "frame[%"PRIu32"] channel[%i] subframe[%i]"
737 " len %i\n", s->frame_num, c, i,
738 s->channel[c].subframe_len[i]);
739 s->channel[c].subframe_offset[i] = offset;
740 offset += s->channel[c].subframe_len[i];
741 }
742 }
743
744 return 0;
745 }
746
747 /**
748 *@brief Calculate a decorrelation matrix from the bitstream parameters.
749 *@param s codec context
750 *@param chgroup channel group for which the matrix needs to be calculated
751 */
decode_decorrelation_matrix(WMAProDecodeCtx * s,WMAProChannelGrp * chgroup)752 static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
753 WMAProChannelGrp *chgroup)
754 {
755 int i;
756 int offset = 0;
757 int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
758 memset(chgroup->decorrelation_matrix, 0, s->nb_channels *
759 s->nb_channels * sizeof(*chgroup->decorrelation_matrix));
760
761 for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
762 rotation_offset[i] = get_bits(&s->gb, 6);
763
764 for (i = 0; i < chgroup->num_channels; i++)
765 chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
766 get_bits1(&s->gb) ? 1.0 : -1.0;
767
768 for (i = 1; i < chgroup->num_channels; i++) {
769 int x;
770 for (x = 0; x < i; x++) {
771 int y;
772 for (y = 0; y < i + 1; y++) {
773 float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
774 float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
775 int n = rotation_offset[offset + x];
776 float sinv;
777 float cosv;
778
779 if (n < 32) {
780 sinv = sin64[n];
781 cosv = sin64[32 - n];
782 } else {
783 sinv = sin64[64 - n];
784 cosv = -sin64[n - 32];
785 }
786
787 chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
788 (v1 * sinv) - (v2 * cosv);
789 chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
790 (v1 * cosv) + (v2 * sinv);
791 }
792 }
793 offset += i;
794 }
795 }
796
797 /**
798 *@brief Decode channel transformation parameters
799 *@param s codec context
800 *@return >= 0 in case of success, < 0 in case of bitstream errors
801 */
decode_channel_transform(WMAProDecodeCtx * s)802 static int decode_channel_transform(WMAProDecodeCtx* s)
803 {
804 int i;
805 /* should never consume more than 1921 bits for the 8 channel case
806 * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
807 * + MAX_CHANNELS + MAX_BANDS + 1)
808 */
809
810 /** in the one channel case channel transforms are pointless */
811 s->num_chgroups = 0;
812 if (s->nb_channels > 1) {
813 int remaining_channels = s->channels_for_cur_subframe;
814
815 if (get_bits1(&s->gb)) {
816 avpriv_request_sample(s->avctx,
817 "Channel transform bit");
818 return AVERROR_PATCHWELCOME;
819 }
820
821 for (s->num_chgroups = 0; remaining_channels &&
822 s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
823 WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
824 float** channel_data = chgroup->channel_data;
825 chgroup->num_channels = 0;
826 chgroup->transform = 0;
827
828 /** decode channel mask */
829 if (remaining_channels > 2) {
830 for (i = 0; i < s->channels_for_cur_subframe; i++) {
831 int channel_idx = s->channel_indexes_for_cur_subframe[i];
832 if (!s->channel[channel_idx].grouped
833 && get_bits1(&s->gb)) {
834 ++chgroup->num_channels;
835 s->channel[channel_idx].grouped = 1;
836 *channel_data++ = s->channel[channel_idx].coeffs;
837 }
838 }
839 } else {
840 chgroup->num_channels = remaining_channels;
841 for (i = 0; i < s->channels_for_cur_subframe; i++) {
842 int channel_idx = s->channel_indexes_for_cur_subframe[i];
843 if (!s->channel[channel_idx].grouped)
844 *channel_data++ = s->channel[channel_idx].coeffs;
845 s->channel[channel_idx].grouped = 1;
846 }
847 }
848
849 /** decode transform type */
850 if (chgroup->num_channels == 2) {
851 if (get_bits1(&s->gb)) {
852 if (get_bits1(&s->gb)) {
853 avpriv_request_sample(s->avctx,
854 "Unknown channel transform type");
855 return AVERROR_PATCHWELCOME;
856 }
857 } else {
858 chgroup->transform = 1;
859 if (s->nb_channels == 2) {
860 chgroup->decorrelation_matrix[0] = 1.0;
861 chgroup->decorrelation_matrix[1] = -1.0;
862 chgroup->decorrelation_matrix[2] = 1.0;
863 chgroup->decorrelation_matrix[3] = 1.0;
864 } else {
865 /** cos(pi/4) */
866 chgroup->decorrelation_matrix[0] = 0.70703125;
867 chgroup->decorrelation_matrix[1] = -0.70703125;
868 chgroup->decorrelation_matrix[2] = 0.70703125;
869 chgroup->decorrelation_matrix[3] = 0.70703125;
870 }
871 }
872 } else if (chgroup->num_channels > 2) {
873 if (get_bits1(&s->gb)) {
874 chgroup->transform = 1;
875 if (get_bits1(&s->gb)) {
876 decode_decorrelation_matrix(s, chgroup);
877 } else {
878 /** FIXME: more than 6 coupled channels not supported */
879 if (chgroup->num_channels > 6) {
880 avpriv_request_sample(s->avctx,
881 "Coupled channels > 6");
882 } else {
883 memcpy(chgroup->decorrelation_matrix,
884 default_decorrelation[chgroup->num_channels],
885 chgroup->num_channels * chgroup->num_channels *
886 sizeof(*chgroup->decorrelation_matrix));
887 }
888 }
889 }
890 }
891
892 /** decode transform on / off */
893 if (chgroup->transform) {
894 if (!get_bits1(&s->gb)) {
895 int i;
896 /** transform can be enabled for individual bands */
897 for (i = 0; i < s->num_bands; i++) {
898 chgroup->transform_band[i] = get_bits1(&s->gb);
899 }
900 } else {
901 memset(chgroup->transform_band, 1, s->num_bands);
902 }
903 }
904 remaining_channels -= chgroup->num_channels;
905 }
906 }
907 return 0;
908 }
909
910 /**
911 *@brief Extract the coefficients from the bitstream.
912 *@param s codec context
913 *@param c current channel number
914 *@return 0 on success, < 0 in case of bitstream errors
915 */
decode_coeffs(WMAProDecodeCtx * s,int c)916 static int decode_coeffs(WMAProDecodeCtx *s, int c)
917 {
918 /* Integers 0..15 as single-precision floats. The table saves a
919 costly int to float conversion, and storing the values as
920 integers allows fast sign-flipping. */
921 static const uint32_t fval_tab[16] = {
922 0x00000000, 0x3f800000, 0x40000000, 0x40400000,
923 0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
924 0x41000000, 0x41100000, 0x41200000, 0x41300000,
925 0x41400000, 0x41500000, 0x41600000, 0x41700000,
926 };
927 int vlctable;
928 VLC* vlc;
929 WMAProChannelCtx* ci = &s->channel[c];
930 int rl_mode = 0;
931 int cur_coeff = 0;
932 int num_zeros = 0;
933 const uint16_t* run;
934 const float* level;
935
936 ff_dlog(s->avctx, "decode coefficients for channel %i\n", c);
937
938 vlctable = get_bits1(&s->gb);
939 vlc = &coef_vlc[vlctable];
940
941 if (vlctable) {
942 run = coef1_run;
943 level = coef1_level;
944 } else {
945 run = coef0_run;
946 level = coef0_level;
947 }
948
949 /** decode vector coefficients (consumes up to 167 bits per iteration for
950 4 vector coded large values) */
951 while ((s->transmit_num_vec_coeffs || !rl_mode) &&
952 (cur_coeff + 3 < ci->num_vec_coeffs)) {
953 uint32_t vals[4];
954 int i;
955 unsigned int idx;
956
957 idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
958
959 if (idx == HUFF_VEC4_SIZE - 1) {
960 for (i = 0; i < 4; i += 2) {
961 idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
962 if (idx == HUFF_VEC2_SIZE - 1) {
963 uint32_t v0, v1;
964 v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
965 if (v0 == HUFF_VEC1_SIZE - 1)
966 v0 += ff_wma_get_large_val(&s->gb);
967 v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
968 if (v1 == HUFF_VEC1_SIZE - 1)
969 v1 += ff_wma_get_large_val(&s->gb);
970 vals[i ] = av_float2int(v0);
971 vals[i+1] = av_float2int(v1);
972 } else {
973 vals[i] = fval_tab[symbol_to_vec2[idx] >> 4 ];
974 vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
975 }
976 }
977 } else {
978 vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12 ];
979 vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
980 vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
981 vals[3] = fval_tab[ symbol_to_vec4[idx] & 0xF];
982 }
983
984 /** decode sign */
985 for (i = 0; i < 4; i++) {
986 if (vals[i]) {
987 uint32_t sign = get_bits1(&s->gb) - 1;
988 AV_WN32A(&ci->coeffs[cur_coeff], vals[i] ^ sign << 31);
989 num_zeros = 0;
990 } else {
991 ci->coeffs[cur_coeff] = 0;
992 /** switch to run level mode when subframe_len / 128 zeros
993 were found in a row */
994 rl_mode |= (++num_zeros > s->subframe_len >> 8);
995 }
996 ++cur_coeff;
997 }
998 }
999
1000 /** decode run level coded coefficients */
1001 if (cur_coeff < s->subframe_len) {
1002 int ret;
1003
1004 memset(&ci->coeffs[cur_coeff], 0,
1005 sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
1006 ret = ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
1007 level, run, 1, ci->coeffs,
1008 cur_coeff, s->subframe_len,
1009 s->subframe_len, s->esc_len, 0);
1010 if (ret < 0)
1011 return ret;
1012 }
1013
1014 return 0;
1015 }
1016
1017 /**
1018 *@brief Extract scale factors from the bitstream.
1019 *@param s codec context
1020 *@return 0 on success, < 0 in case of bitstream errors
1021 */
decode_scale_factors(WMAProDecodeCtx * s)1022 static int decode_scale_factors(WMAProDecodeCtx* s)
1023 {
1024 int i;
1025
1026 /** should never consume more than 5344 bits
1027 * MAX_CHANNELS * (1 + MAX_BANDS * 23)
1028 */
1029
1030 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1031 int c = s->channel_indexes_for_cur_subframe[i];
1032 int* sf;
1033 int* sf_end;
1034 s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
1035 sf_end = s->channel[c].scale_factors + s->num_bands;
1036
1037 /** resample scale factors for the new block size
1038 * as the scale factors might need to be resampled several times
1039 * before some new values are transmitted, a backup of the last
1040 * transmitted scale factors is kept in saved_scale_factors
1041 */
1042 if (s->channel[c].reuse_sf) {
1043 const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
1044 int b;
1045 for (b = 0; b < s->num_bands; b++)
1046 s->channel[c].scale_factors[b] =
1047 s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
1048 }
1049
1050 if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
1051
1052 if (!s->channel[c].reuse_sf) {
1053 int val;
1054 /** decode DPCM coded scale factors */
1055 s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
1056 val = 45 / s->channel[c].scale_factor_step;
1057 for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
1058 val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
1059 *sf = val;
1060 }
1061 } else {
1062 int i;
1063 /** run level decode differences to the resampled factors */
1064 for (i = 0; i < s->num_bands; i++) {
1065 int idx;
1066 int skip;
1067 int val;
1068 int sign;
1069
1070 idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
1071
1072 if (!idx) {
1073 uint32_t code = get_bits(&s->gb, 14);
1074 val = code >> 6;
1075 sign = (code & 1) - 1;
1076 skip = (code & 0x3f) >> 1;
1077 } else if (idx == 1) {
1078 break;
1079 } else {
1080 skip = scale_rl_run[idx];
1081 val = scale_rl_level[idx];
1082 sign = get_bits1(&s->gb)-1;
1083 }
1084
1085 i += skip;
1086 if (i >= s->num_bands) {
1087 av_log(s->avctx, AV_LOG_ERROR,
1088 "invalid scale factor coding\n");
1089 return AVERROR_INVALIDDATA;
1090 }
1091 s->channel[c].scale_factors[i] += (val ^ sign) - sign;
1092 }
1093 }
1094 /** swap buffers */
1095 s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
1096 s->channel[c].table_idx = s->table_idx;
1097 s->channel[c].reuse_sf = 1;
1098 }
1099
1100 /** calculate new scale factor maximum */
1101 s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
1102 for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
1103 s->channel[c].max_scale_factor =
1104 FFMAX(s->channel[c].max_scale_factor, *sf);
1105 }
1106
1107 }
1108 return 0;
1109 }
1110
1111 /**
1112 *@brief Reconstruct the individual channel data.
1113 *@param s codec context
1114 */
inverse_channel_transform(WMAProDecodeCtx * s)1115 static void inverse_channel_transform(WMAProDecodeCtx *s)
1116 {
1117 int i;
1118
1119 for (i = 0; i < s->num_chgroups; i++) {
1120 if (s->chgroup[i].transform) {
1121 float data[WMAPRO_MAX_CHANNELS];
1122 const int num_channels = s->chgroup[i].num_channels;
1123 float** ch_data = s->chgroup[i].channel_data;
1124 float** ch_end = ch_data + num_channels;
1125 const int8_t* tb = s->chgroup[i].transform_band;
1126 int16_t* sfb;
1127
1128 /** multichannel decorrelation */
1129 for (sfb = s->cur_sfb_offsets;
1130 sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
1131 int y;
1132 if (*tb++ == 1) {
1133 /** multiply values with the decorrelation_matrix */
1134 for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
1135 const float* mat = s->chgroup[i].decorrelation_matrix;
1136 const float* data_end = data + num_channels;
1137 float* data_ptr = data;
1138 float** ch;
1139
1140 for (ch = ch_data; ch < ch_end; ch++)
1141 *data_ptr++ = (*ch)[y];
1142
1143 for (ch = ch_data; ch < ch_end; ch++) {
1144 float sum = 0;
1145 data_ptr = data;
1146 while (data_ptr < data_end)
1147 sum += *data_ptr++ * *mat++;
1148
1149 (*ch)[y] = sum;
1150 }
1151 }
1152 } else if (s->nb_channels == 2) {
1153 int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
1154 s->fdsp->vector_fmul_scalar(ch_data[0] + sfb[0],
1155 ch_data[0] + sfb[0],
1156 181.0 / 128, len);
1157 s->fdsp->vector_fmul_scalar(ch_data[1] + sfb[0],
1158 ch_data[1] + sfb[0],
1159 181.0 / 128, len);
1160 }
1161 }
1162 }
1163 }
1164 }
1165
1166 /**
1167 *@brief Apply sine window and reconstruct the output buffer.
1168 *@param s codec context
1169 */
wmapro_window(WMAProDecodeCtx * s)1170 static void wmapro_window(WMAProDecodeCtx *s)
1171 {
1172 int i;
1173 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1174 int c = s->channel_indexes_for_cur_subframe[i];
1175 const float* window;
1176 int winlen = s->channel[c].prev_block_len;
1177 float* start = s->channel[c].coeffs - (winlen >> 1);
1178
1179 if (s->subframe_len < winlen) {
1180 start += (winlen - s->subframe_len) >> 1;
1181 winlen = s->subframe_len;
1182 }
1183
1184 window = s->windows[av_log2(winlen) - WMAPRO_BLOCK_MIN_BITS];
1185
1186 winlen >>= 1;
1187
1188 s->fdsp->vector_fmul_window(start, start, start + winlen,
1189 window, winlen);
1190
1191 s->channel[c].prev_block_len = s->subframe_len;
1192 }
1193 }
1194
1195 /**
1196 *@brief Decode a single subframe (block).
1197 *@param s codec context
1198 *@return 0 on success, < 0 when decoding failed
1199 */
decode_subframe(WMAProDecodeCtx * s)1200 static int decode_subframe(WMAProDecodeCtx *s)
1201 {
1202 int offset = s->samples_per_frame;
1203 int subframe_len = s->samples_per_frame;
1204 int i;
1205 int total_samples = s->samples_per_frame * s->nb_channels;
1206 int transmit_coeffs = 0;
1207 int cur_subwoofer_cutoff;
1208
1209 s->subframe_offset = get_bits_count(&s->gb);
1210
1211 /** reset channel context and find the next block offset and size
1212 == the next block of the channel with the smallest number of
1213 decoded samples
1214 */
1215 for (i = 0; i < s->nb_channels; i++) {
1216 s->channel[i].grouped = 0;
1217 if (offset > s->channel[i].decoded_samples) {
1218 offset = s->channel[i].decoded_samples;
1219 subframe_len =
1220 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1221 }
1222 }
1223
1224 ff_dlog(s->avctx,
1225 "processing subframe with offset %i len %i\n", offset, subframe_len);
1226
1227 /** get a list of all channels that contain the estimated block */
1228 s->channels_for_cur_subframe = 0;
1229 for (i = 0; i < s->nb_channels; i++) {
1230 const int cur_subframe = s->channel[i].cur_subframe;
1231 /** subtract already processed samples */
1232 total_samples -= s->channel[i].decoded_samples;
1233
1234 /** and count if there are multiple subframes that match our profile */
1235 if (offset == s->channel[i].decoded_samples &&
1236 subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1237 total_samples -= s->channel[i].subframe_len[cur_subframe];
1238 s->channel[i].decoded_samples +=
1239 s->channel[i].subframe_len[cur_subframe];
1240 s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1241 ++s->channels_for_cur_subframe;
1242 }
1243 }
1244
1245 /** check if the frame will be complete after processing the
1246 estimated block */
1247 if (!total_samples)
1248 s->parsed_all_subframes = 1;
1249
1250
1251 ff_dlog(s->avctx, "subframe is part of %i channels\n",
1252 s->channels_for_cur_subframe);
1253
1254 /** calculate number of scale factor bands and their offsets */
1255 s->table_idx = av_log2(s->samples_per_frame/subframe_len);
1256 s->num_bands = s->num_sfb[s->table_idx];
1257 s->cur_sfb_offsets = s->sfb_offsets[s->table_idx];
1258 cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
1259
1260 /** configure the decoder for the current subframe */
1261 offset += s->samples_per_frame >> 1;
1262
1263 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1264 int c = s->channel_indexes_for_cur_subframe[i];
1265
1266 s->channel[c].coeffs = &s->channel[c].out[offset];
1267 }
1268
1269 s->subframe_len = subframe_len;
1270 s->esc_len = av_log2(s->subframe_len - 1) + 1;
1271
1272 /** skip extended header if any */
1273 if (get_bits1(&s->gb)) {
1274 int num_fill_bits;
1275 if (!(num_fill_bits = get_bits(&s->gb, 2))) {
1276 int len = get_bits(&s->gb, 4);
1277 num_fill_bits = get_bitsz(&s->gb, len) + 1;
1278 }
1279
1280 if (num_fill_bits >= 0) {
1281 if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
1282 av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
1283 return AVERROR_INVALIDDATA;
1284 }
1285
1286 skip_bits_long(&s->gb, num_fill_bits);
1287 }
1288 }
1289
1290 /** no idea for what the following bit is used */
1291 if (get_bits1(&s->gb)) {
1292 avpriv_request_sample(s->avctx, "Reserved bit");
1293 return AVERROR_PATCHWELCOME;
1294 }
1295
1296
1297 if (decode_channel_transform(s) < 0)
1298 return AVERROR_INVALIDDATA;
1299
1300
1301 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1302 int c = s->channel_indexes_for_cur_subframe[i];
1303 if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
1304 transmit_coeffs = 1;
1305 }
1306
1307 av_assert0(s->subframe_len <= WMAPRO_BLOCK_MAX_SIZE);
1308 if (transmit_coeffs) {
1309 int step;
1310 int quant_step = 90 * s->bits_per_sample >> 4;
1311
1312 /** decode number of vector coded coefficients */
1313 if ((s->transmit_num_vec_coeffs = get_bits1(&s->gb))) {
1314 int num_bits = av_log2((s->subframe_len + 3)/4) + 1;
1315 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1316 int c = s->channel_indexes_for_cur_subframe[i];
1317 int num_vec_coeffs = get_bits(&s->gb, num_bits) << 2;
1318 if (num_vec_coeffs > s->subframe_len) {
1319 av_log(s->avctx, AV_LOG_ERROR, "num_vec_coeffs %d is too large\n", num_vec_coeffs);
1320 return AVERROR_INVALIDDATA;
1321 }
1322 av_assert0(num_vec_coeffs + offset <= FF_ARRAY_ELEMS(s->channel[c].out));
1323 s->channel[c].num_vec_coeffs = num_vec_coeffs;
1324 }
1325 } else {
1326 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1327 int c = s->channel_indexes_for_cur_subframe[i];
1328 s->channel[c].num_vec_coeffs = s->subframe_len;
1329 }
1330 }
1331 /** decode quantization step */
1332 step = get_sbits(&s->gb, 6);
1333 quant_step += step;
1334 if (step == -32 || step == 31) {
1335 const int sign = (step == 31) - 1;
1336 int quant = 0;
1337 while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
1338 (step = get_bits(&s->gb, 5)) == 31) {
1339 quant += 31;
1340 }
1341 quant_step += ((quant + step) ^ sign) - sign;
1342 }
1343 if (quant_step < 0) {
1344 av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
1345 }
1346
1347 /** decode quantization step modifiers for every channel */
1348
1349 if (s->channels_for_cur_subframe == 1) {
1350 s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
1351 } else {
1352 int modifier_len = get_bits(&s->gb, 3);
1353 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1354 int c = s->channel_indexes_for_cur_subframe[i];
1355 s->channel[c].quant_step = quant_step;
1356 if (get_bits1(&s->gb)) {
1357 if (modifier_len) {
1358 s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
1359 } else
1360 ++s->channel[c].quant_step;
1361 }
1362 }
1363 }
1364
1365 /** decode scale factors */
1366 if (decode_scale_factors(s) < 0)
1367 return AVERROR_INVALIDDATA;
1368 }
1369
1370 ff_dlog(s->avctx, "BITSTREAM: subframe header length was %i\n",
1371 get_bits_count(&s->gb) - s->subframe_offset);
1372
1373 /** parse coefficients */
1374 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1375 int c = s->channel_indexes_for_cur_subframe[i];
1376 if (s->channel[c].transmit_coefs &&
1377 get_bits_count(&s->gb) < s->num_saved_bits) {
1378 decode_coeffs(s, c);
1379 } else
1380 memset(s->channel[c].coeffs, 0,
1381 sizeof(*s->channel[c].coeffs) * subframe_len);
1382 }
1383
1384 ff_dlog(s->avctx, "BITSTREAM: subframe length was %i\n",
1385 get_bits_count(&s->gb) - s->subframe_offset);
1386
1387 if (transmit_coeffs) {
1388 FFTContext *mdct = &s->mdct_ctx[av_log2(subframe_len) - WMAPRO_BLOCK_MIN_BITS];
1389 /** reconstruct the per channel data */
1390 inverse_channel_transform(s);
1391 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1392 int c = s->channel_indexes_for_cur_subframe[i];
1393 const int* sf = s->channel[c].scale_factors;
1394 int b;
1395
1396 if (c == s->lfe_channel)
1397 memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
1398 (subframe_len - cur_subwoofer_cutoff));
1399
1400 /** inverse quantization and rescaling */
1401 for (b = 0; b < s->num_bands; b++) {
1402 const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
1403 const int exp = s->channel[c].quant_step -
1404 (s->channel[c].max_scale_factor - *sf++) *
1405 s->channel[c].scale_factor_step;
1406 const float quant = ff_exp10(exp / 20.0);
1407 int start = s->cur_sfb_offsets[b];
1408 s->fdsp->vector_fmul_scalar(s->tmp + start,
1409 s->channel[c].coeffs + start,
1410 quant, end - start);
1411 }
1412
1413 /** apply imdct (imdct_half == DCTIV with reverse) */
1414 mdct->imdct_half(mdct, s->channel[c].coeffs, s->tmp);
1415 }
1416 }
1417
1418 /** window and overlapp-add */
1419 wmapro_window(s);
1420
1421 /** handled one subframe */
1422 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1423 int c = s->channel_indexes_for_cur_subframe[i];
1424 if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1425 av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1426 return AVERROR_INVALIDDATA;
1427 }
1428 ++s->channel[c].cur_subframe;
1429 }
1430
1431 return 0;
1432 }
1433
1434 /**
1435 *@brief Decode one WMA frame.
1436 *@param s codec context
1437 *@return 0 if the trailer bit indicates that this is the last frame,
1438 * 1 if there are additional frames
1439 */
decode_frame(WMAProDecodeCtx * s,AVFrame * frame,int * got_frame_ptr)1440 static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
1441 {
1442 GetBitContext* gb = &s->gb;
1443 int more_frames = 0;
1444 int len = 0;
1445 int i;
1446
1447 /** get frame length */
1448 if (s->len_prefix)
1449 len = get_bits(gb, s->log2_frame_size);
1450
1451 ff_dlog(s->avctx, "decoding frame with length %x\n", len);
1452
1453 /** decode tile information */
1454 if (decode_tilehdr(s)) {
1455 s->packet_loss = 1;
1456 return 0;
1457 }
1458
1459 /** read postproc transform */
1460 if (s->nb_channels > 1 && get_bits1(gb)) {
1461 if (get_bits1(gb)) {
1462 for (i = 0; i < s->nb_channels * s->nb_channels; i++)
1463 skip_bits(gb, 4);
1464 }
1465 }
1466
1467 /** read drc info */
1468 if (s->dynamic_range_compression) {
1469 s->drc_gain = get_bits(gb, 8);
1470 ff_dlog(s->avctx, "drc_gain %i\n", s->drc_gain);
1471 }
1472
1473 if (get_bits1(gb)) {
1474 if (get_bits1(gb))
1475 s->trim_start = get_bits(gb, av_log2(s->samples_per_frame * 2));
1476
1477 if (get_bits1(gb))
1478 s->trim_end = get_bits(gb, av_log2(s->samples_per_frame * 2));
1479 } else {
1480 s->trim_start = s->trim_end = 0;
1481 }
1482
1483 ff_dlog(s->avctx, "BITSTREAM: frame header length was %i\n",
1484 get_bits_count(gb) - s->frame_offset);
1485
1486 /** reset subframe states */
1487 s->parsed_all_subframes = 0;
1488 for (i = 0; i < s->nb_channels; i++) {
1489 s->channel[i].decoded_samples = 0;
1490 s->channel[i].cur_subframe = 0;
1491 s->channel[i].reuse_sf = 0;
1492 }
1493
1494 /** decode all subframes */
1495 while (!s->parsed_all_subframes) {
1496 if (decode_subframe(s) < 0) {
1497 s->packet_loss = 1;
1498 return 0;
1499 }
1500 }
1501
1502 /** copy samples to the output buffer */
1503 for (i = 0; i < s->nb_channels; i++)
1504 memcpy(frame->extended_data[i], s->channel[i].out,
1505 s->samples_per_frame * sizeof(*s->channel[i].out));
1506
1507 for (i = 0; i < s->nb_channels; i++) {
1508 /** reuse second half of the IMDCT output for the next frame */
1509 memcpy(&s->channel[i].out[0],
1510 &s->channel[i].out[s->samples_per_frame],
1511 s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1512 }
1513
1514 if (s->skip_frame) {
1515 s->skip_frame = 0;
1516 *got_frame_ptr = 0;
1517 av_frame_unref(frame);
1518 } else {
1519 *got_frame_ptr = 1;
1520 }
1521
1522 if (s->len_prefix) {
1523 if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1524 /** FIXME: not sure if this is always an error */
1525 av_log(s->avctx, AV_LOG_ERROR,
1526 "frame[%"PRIu32"] would have to skip %i bits\n",
1527 s->frame_num,
1528 len - (get_bits_count(gb) - s->frame_offset) - 1);
1529 s->packet_loss = 1;
1530 return 0;
1531 }
1532
1533 /** skip the rest of the frame data */
1534 skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1535 } else {
1536 while (get_bits_count(gb) < s->num_saved_bits && get_bits1(gb) == 0) {
1537 }
1538 }
1539
1540 /** decode trailer bit */
1541 more_frames = get_bits1(gb);
1542
1543 ++s->frame_num;
1544 return more_frames;
1545 }
1546
1547 /**
1548 *@brief Calculate remaining input buffer length.
1549 *@param s codec context
1550 *@param gb bitstream reader context
1551 *@return remaining size in bits
1552 */
remaining_bits(WMAProDecodeCtx * s,GetBitContext * gb)1553 static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
1554 {
1555 return s->buf_bit_size - get_bits_count(gb);
1556 }
1557
1558 /**
1559 *@brief Fill the bit reservoir with a (partial) frame.
1560 *@param s codec context
1561 *@param gb bitstream reader context
1562 *@param len length of the partial frame
1563 *@param append decides whether to reset the buffer or not
1564 */
save_bits(WMAProDecodeCtx * s,GetBitContext * gb,int len,int append)1565 static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
1566 int append)
1567 {
1568 int buflen;
1569
1570 /** when the frame data does not need to be concatenated, the input buffer
1571 is reset and additional bits from the previous frame are copied
1572 and skipped later so that a fast byte copy is possible */
1573
1574 if (!append) {
1575 s->frame_offset = get_bits_count(gb) & 7;
1576 s->num_saved_bits = s->frame_offset;
1577 init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1578 buflen = (s->num_saved_bits + len + 7) >> 3;
1579 } else
1580 buflen = (put_bits_count(&s->pb) + len + 7) >> 3;
1581
1582 if (len <= 0 || buflen > MAX_FRAMESIZE) {
1583 avpriv_request_sample(s->avctx, "Too small input buffer");
1584 s->packet_loss = 1;
1585 return;
1586 }
1587
1588 av_assert0(len <= put_bits_left(&s->pb));
1589
1590 s->num_saved_bits += len;
1591 if (!append) {
1592 ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1593 s->num_saved_bits);
1594 } else {
1595 int align = 8 - (get_bits_count(gb) & 7);
1596 align = FFMIN(align, len);
1597 put_bits(&s->pb, align, get_bits(gb, align));
1598 len -= align;
1599 ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1600 }
1601 skip_bits_long(gb, len);
1602
1603 {
1604 PutBitContext tmp = s->pb;
1605 flush_put_bits(&tmp);
1606 }
1607
1608 init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1609 skip_bits(&s->gb, s->frame_offset);
1610 }
1611
decode_packet(AVCodecContext * avctx,WMAProDecodeCtx * s,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1612 static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s,
1613 AVFrame *frame, int *got_frame_ptr, AVPacket *avpkt)
1614 {
1615 GetBitContext* gb = &s->pgb;
1616 const uint8_t* buf = avpkt->data;
1617 int buf_size = avpkt->size;
1618 int num_bits_prev_frame;
1619 int packet_sequence_number;
1620 int ret;
1621
1622 *got_frame_ptr = 0;
1623
1624 if (!buf_size) {
1625 int i;
1626
1627 /** Must output remaining samples after stream end. WMAPRO 5.1 created
1628 * by XWMA encoder don't though (maybe only 1/2ch streams need it). */
1629 s->packet_done = 0;
1630 if (s->eof_done)
1631 return 0;
1632
1633 /** clean output buffer and copy last IMDCT samples */
1634 for (i = 0; i < s->nb_channels; i++) {
1635 memset(frame->extended_data[i], 0,
1636 s->samples_per_frame * sizeof(*s->channel[i].out));
1637
1638 memcpy(frame->extended_data[i], s->channel[i].out,
1639 s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
1640 }
1641
1642 s->eof_done = 1;
1643 s->packet_done = 1;
1644 *got_frame_ptr = 1;
1645 return 0;
1646 }
1647 else if (s->packet_done || s->packet_loss) {
1648 s->packet_done = 0;
1649
1650 /** sanity check for the buffer length */
1651 if (avctx->codec_id == AV_CODEC_ID_WMAPRO && buf_size < avctx->block_align) {
1652 av_log(avctx, AV_LOG_ERROR, "Input packet too small (%d < %d)\n",
1653 buf_size, avctx->block_align);
1654 s->packet_loss = 1;
1655 return AVERROR_INVALIDDATA;
1656 }
1657
1658 if (avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1659 s->next_packet_start = buf_size - avctx->block_align;
1660 buf_size = avctx->block_align;
1661 } else {
1662 s->next_packet_start = buf_size - FFMIN(buf_size, avctx->block_align);
1663 buf_size = FFMIN(buf_size, avctx->block_align);
1664 }
1665 s->buf_bit_size = buf_size << 3;
1666
1667 /** parse packet header */
1668 ret = init_get_bits8(gb, buf, buf_size);
1669 if (ret < 0)
1670 return ret;
1671 if (avctx->codec_id != AV_CODEC_ID_XMA2) {
1672 packet_sequence_number = get_bits(gb, 4);
1673 skip_bits(gb, 2);
1674 } else {
1675 int num_frames = get_bits(gb, 6);
1676 ff_dlog(avctx, "packet[%d]: number of frames %d\n", avctx->frame_number, num_frames);
1677 packet_sequence_number = 0;
1678 }
1679
1680 /** get number of bits that need to be added to the previous frame */
1681 num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1682 if (avctx->codec_id != AV_CODEC_ID_WMAPRO) {
1683 skip_bits(gb, 3);
1684 s->skip_packets = get_bits(gb, 8);
1685 ff_dlog(avctx, "packet[%d]: skip packets %d\n", avctx->frame_number, s->skip_packets);
1686 }
1687
1688 ff_dlog(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
1689 num_bits_prev_frame);
1690
1691 /** check for packet loss */
1692 if (avctx->codec_id == AV_CODEC_ID_WMAPRO && !s->packet_loss &&
1693 ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1694 s->packet_loss = 1;
1695 av_log(avctx, AV_LOG_ERROR,
1696 "Packet loss detected! seq %"PRIx8" vs %x\n",
1697 s->packet_sequence_number, packet_sequence_number);
1698 }
1699 s->packet_sequence_number = packet_sequence_number;
1700
1701 if (num_bits_prev_frame > 0) {
1702 int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1703 if (num_bits_prev_frame >= remaining_packet_bits) {
1704 num_bits_prev_frame = remaining_packet_bits;
1705 s->packet_done = 1;
1706 }
1707
1708 /** append the previous frame data to the remaining data from the
1709 previous packet to create a full frame */
1710 save_bits(s, gb, num_bits_prev_frame, 1);
1711 ff_dlog(avctx, "accumulated %x bits of frame data\n",
1712 s->num_saved_bits - s->frame_offset);
1713
1714 /** decode the cross packet frame if it is valid */
1715 if (!s->packet_loss)
1716 decode_frame(s, frame, got_frame_ptr);
1717 } else if (s->num_saved_bits - s->frame_offset) {
1718 ff_dlog(avctx, "ignoring %x previously saved bits\n",
1719 s->num_saved_bits - s->frame_offset);
1720 }
1721
1722 if (s->packet_loss) {
1723 /** reset number of saved bits so that the decoder
1724 does not start to decode incomplete frames in the
1725 s->len_prefix == 0 case */
1726 s->num_saved_bits = 0;
1727 s->packet_loss = 0;
1728 }
1729 } else {
1730 int frame_size;
1731
1732 if (avpkt->size < s->next_packet_start) {
1733 s->packet_loss = 1;
1734 return AVERROR_INVALIDDATA;
1735 }
1736
1737 s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1738 ret = init_get_bits8(gb, avpkt->data, avpkt->size - s->next_packet_start);
1739 if (ret < 0)
1740 return ret;
1741 skip_bits(gb, s->packet_offset);
1742 if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1743 (frame_size = show_bits(gb, s->log2_frame_size)) &&
1744 frame_size <= remaining_bits(s, gb)) {
1745 save_bits(s, gb, frame_size, 0);
1746 if (!s->packet_loss)
1747 s->packet_done = !decode_frame(s, frame, got_frame_ptr);
1748 } else if (!s->len_prefix
1749 && s->num_saved_bits > get_bits_count(&s->gb)) {
1750 /** when the frames do not have a length prefix, we don't know
1751 the compressed length of the individual frames
1752 however, we know what part of a new packet belongs to the
1753 previous frame
1754 therefore we save the incoming packet first, then we append
1755 the "previous frame" data from the next packet so that
1756 we get a buffer that only contains full frames */
1757 s->packet_done = !decode_frame(s, frame, got_frame_ptr);
1758 } else {
1759 s->packet_done = 1;
1760 }
1761 }
1762
1763 if (remaining_bits(s, gb) < 0) {
1764 av_log(avctx, AV_LOG_ERROR, "Overread %d\n", -remaining_bits(s, gb));
1765 s->packet_loss = 1;
1766 }
1767
1768 if (s->packet_done && !s->packet_loss &&
1769 remaining_bits(s, gb) > 0) {
1770 /** save the rest of the data so that it can be decoded
1771 with the next packet */
1772 save_bits(s, gb, remaining_bits(s, gb), 0);
1773 }
1774
1775 s->packet_offset = get_bits_count(gb) & 7;
1776 if (s->packet_loss)
1777 return AVERROR_INVALIDDATA;
1778
1779 if (s->trim_start && avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1780 if (s->trim_start < frame->nb_samples) {
1781 for (int ch = 0; ch < frame->ch_layout.nb_channels; ch++)
1782 frame->extended_data[ch] += s->trim_start * 4;
1783
1784 frame->nb_samples -= s->trim_start;
1785 } else {
1786 *got_frame_ptr = 0;
1787 }
1788
1789 s->trim_start = 0;
1790 }
1791
1792 if (s->trim_end && avctx->codec_id == AV_CODEC_ID_WMAPRO) {
1793 if (s->trim_end < frame->nb_samples) {
1794 frame->nb_samples -= s->trim_end;
1795 } else {
1796 *got_frame_ptr = 0;
1797 }
1798
1799 s->trim_end = 0;
1800 }
1801
1802 return get_bits_count(gb) >> 3;
1803 }
1804
1805 /**
1806 *@brief Decode a single WMA packet.
1807 *@param avctx codec context
1808 *@param data the output buffer
1809 *@param avpkt input packet
1810 *@return number of bytes that were read from the input buffer
1811 */
wmapro_decode_packet(AVCodecContext * avctx,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1812 static int wmapro_decode_packet(AVCodecContext *avctx, AVFrame *frame,
1813 int *got_frame_ptr, AVPacket *avpkt)
1814 {
1815 WMAProDecodeCtx *s = avctx->priv_data;
1816 int ret;
1817
1818 /* get output buffer */
1819 frame->nb_samples = s->samples_per_frame;
1820 if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
1821 s->packet_loss = 1;
1822 return 0;
1823 }
1824
1825 return decode_packet(avctx, s, frame, got_frame_ptr, avpkt);
1826 }
1827
xma_decode_packet(AVCodecContext * avctx,AVFrame * frame,int * got_frame_ptr,AVPacket * avpkt)1828 static int xma_decode_packet(AVCodecContext *avctx, AVFrame *frame,
1829 int *got_frame_ptr, AVPacket *avpkt)
1830 {
1831 XMADecodeCtx *s = avctx->priv_data;
1832 int got_stream_frame_ptr = 0;
1833 int i, ret = 0, eof = 0;
1834
1835 if (!s->frames[s->current_stream]->data[0]) {
1836 avctx->internal->skip_samples = 64;
1837 s->frames[s->current_stream]->nb_samples = 512;
1838 if ((ret = ff_get_buffer(avctx, s->frames[s->current_stream], 0)) < 0)
1839 return ret;
1840 } else if (s->frames[s->current_stream]->nb_samples != 512) {
1841 avctx->internal->skip_samples = 64;
1842 av_frame_unref(s->frames[s->current_stream]);
1843 s->frames[s->current_stream]->nb_samples = 512;
1844 if ((ret = ff_get_buffer(avctx, s->frames[s->current_stream], 0)) < 0)
1845 return ret;
1846 }
1847 /* decode current stream packet */
1848 if (!s->xma[s->current_stream].eof_done) {
1849 ret = decode_packet(avctx, &s->xma[s->current_stream], s->frames[s->current_stream],
1850 &got_stream_frame_ptr, avpkt);
1851 }
1852
1853 if (!avpkt->size) {
1854 eof = 1;
1855
1856 for (i = 0; i < s->num_streams; i++) {
1857 if (!s->xma[i].eof_done && s->frames[i]->data[0]) {
1858 ret = decode_packet(avctx, &s->xma[i], s->frames[i],
1859 &got_stream_frame_ptr, avpkt);
1860 }
1861
1862 eof &= s->xma[i].eof_done;
1863 }
1864 }
1865
1866 if (s->xma[0].trim_start)
1867 s->trim_start = s->xma[0].trim_start;
1868 if (s->xma[0].trim_end)
1869 s->trim_end = s->xma[0].trim_end;
1870
1871 /* copy stream samples (1/2ch) to sample buffer (Nch) */
1872 if (got_stream_frame_ptr) {
1873 const int nb_samples = s->frames[s->current_stream]->nb_samples;
1874 void *left[1] = { s->frames[s->current_stream]->extended_data[0] };
1875 void *right[1] = { s->frames[s->current_stream]->extended_data[1] };
1876
1877 av_audio_fifo_write(s->samples[0][s->current_stream], left, nb_samples);
1878 if (s->xma[s->current_stream].nb_channels > 1)
1879 av_audio_fifo_write(s->samples[1][s->current_stream], right, nb_samples);
1880 } else if (ret < 0) {
1881 s->current_stream = 0;
1882 return ret;
1883 }
1884
1885 /* find next XMA packet's owner stream, and update.
1886 * XMA streams find their packets following packet_skips
1887 * (at start there is one packet per stream, then interleave non-linearly). */
1888 if (s->xma[s->current_stream].packet_done ||
1889 s->xma[s->current_stream].packet_loss) {
1890 int nb_samples = INT_MAX;
1891
1892 /* select stream with 0 skip_packets (= uses next packet) */
1893 if (s->xma[s->current_stream].skip_packets != 0) {
1894 int min[2];
1895
1896 min[0] = s->xma[0].skip_packets;
1897 min[1] = i = 0;
1898
1899 for (i = 1; i < s->num_streams; i++) {
1900 if (s->xma[i].skip_packets < min[0]) {
1901 min[0] = s->xma[i].skip_packets;
1902 min[1] = i;
1903 }
1904 }
1905
1906 s->current_stream = min[1];
1907 }
1908
1909 /* all other streams skip next packet */
1910 for (i = 0; i < s->num_streams; i++) {
1911 s->xma[i].skip_packets = FFMAX(0, s->xma[i].skip_packets - 1);
1912 nb_samples = FFMIN(nb_samples, av_audio_fifo_size(s->samples[0][i]));
1913 }
1914
1915 if (!eof && avpkt->size)
1916 nb_samples -= FFMIN(nb_samples, 4096);
1917
1918 /* copy samples from buffer to output if possible */
1919 if ((nb_samples > 0 || eof || !avpkt->size) && !s->flushed) {
1920 int bret;
1921
1922 if (eof) {
1923 nb_samples -= av_clip(s->trim_end + s->trim_start - 128 - 64, 0, nb_samples);
1924 s->flushed = 1;
1925 }
1926
1927 frame->nb_samples = nb_samples;
1928 if ((bret = ff_get_buffer(avctx, frame, 0)) < 0)
1929 return bret;
1930
1931 for (i = 0; i < s->num_streams; i++) {
1932 const int start_ch = s->start_channel[i];
1933 void *left[1] = { frame->extended_data[start_ch + 0] };
1934
1935 av_audio_fifo_read(s->samples[0][i], left, nb_samples);
1936 if (s->xma[i].nb_channels > 1) {
1937 void *right[1] = { frame->extended_data[start_ch + 1] };
1938 av_audio_fifo_read(s->samples[1][i], right, nb_samples);
1939 }
1940 }
1941
1942 *got_frame_ptr = nb_samples > 0;
1943 }
1944 }
1945
1946 return ret;
1947 }
1948
xma_decode_init(AVCodecContext * avctx)1949 static av_cold int xma_decode_init(AVCodecContext *avctx)
1950 {
1951 XMADecodeCtx *s = avctx->priv_data;
1952 int i, ret, start_channels = 0;
1953
1954 if (avctx->ch_layout.nb_channels <= 0 || avctx->extradata_size == 0)
1955 return AVERROR_INVALIDDATA;
1956
1957 /* get stream config */
1958 if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size == 34) { /* XMA2WAVEFORMATEX */
1959 unsigned int channel_mask = AV_RL32(avctx->extradata + 2);
1960 if (channel_mask) {
1961 av_channel_layout_uninit(&avctx->ch_layout);
1962 av_channel_layout_from_mask(&avctx->ch_layout, channel_mask);
1963 } else
1964 avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
1965 s->num_streams = AV_RL16(avctx->extradata);
1966 } else if (avctx->codec_id == AV_CODEC_ID_XMA2 && avctx->extradata_size >= 2) { /* XMA2WAVEFORMAT */
1967 s->num_streams = avctx->extradata[1];
1968 if (avctx->extradata_size != (32 + ((avctx->extradata[0]==3)?0:8) + 4*s->num_streams)) {
1969 av_log(avctx, AV_LOG_ERROR, "Incorrect XMA2 extradata size\n");
1970 s->num_streams = 0;
1971 return AVERROR(EINVAL);
1972 }
1973 } else if (avctx->codec_id == AV_CODEC_ID_XMA1 && avctx->extradata_size >= 4) { /* XMAWAVEFORMAT */
1974 s->num_streams = avctx->extradata[4];
1975 if (avctx->extradata_size != (8 + 20*s->num_streams)) {
1976 av_log(avctx, AV_LOG_ERROR, "Incorrect XMA1 extradata size\n");
1977 s->num_streams = 0;
1978 return AVERROR(EINVAL);
1979 }
1980 } else {
1981 av_log(avctx, AV_LOG_ERROR, "Incorrect XMA config\n");
1982 return AVERROR(EINVAL);
1983 }
1984
1985 /* encoder supports up to 64 streams / 64*2 channels (would have to alloc arrays) */
1986 if (avctx->ch_layout.nb_channels > XMA_MAX_CHANNELS || s->num_streams > XMA_MAX_STREAMS ||
1987 s->num_streams <= 0
1988 ) {
1989 avpriv_request_sample(avctx, "More than %d channels in %d streams", XMA_MAX_CHANNELS, s->num_streams);
1990 s->num_streams = 0;
1991 return AVERROR_PATCHWELCOME;
1992 }
1993
1994 /* init all streams (several streams of 1/2ch make Nch files) */
1995 for (i = 0; i < s->num_streams; i++) {
1996 ret = decode_init(&s->xma[i], avctx, i);
1997 if (ret < 0)
1998 return ret;
1999 s->frames[i] = av_frame_alloc();
2000 if (!s->frames[i])
2001 return AVERROR(ENOMEM);
2002
2003 s->start_channel[i] = start_channels;
2004 start_channels += s->xma[i].nb_channels;
2005 }
2006 if (start_channels != avctx->ch_layout.nb_channels)
2007 return AVERROR_INVALIDDATA;
2008
2009 for (int i = 0; i < XMA_MAX_STREAMS; i++) {
2010 s->samples[0][i] = av_audio_fifo_alloc(avctx->sample_fmt, 1, 64 * 512);
2011 s->samples[1][i] = av_audio_fifo_alloc(avctx->sample_fmt, 1, 64 * 512);
2012 if (!s->samples[0][i] || !s->samples[1][i])
2013 return AVERROR(ENOMEM);
2014 }
2015
2016 return ret;
2017 }
2018
xma_decode_end(AVCodecContext * avctx)2019 static av_cold int xma_decode_end(AVCodecContext *avctx)
2020 {
2021 XMADecodeCtx *s = avctx->priv_data;
2022 int i;
2023
2024 for (i = 0; i < s->num_streams; i++) {
2025 decode_end(&s->xma[i]);
2026 av_frame_free(&s->frames[i]);
2027 }
2028 s->num_streams = 0;
2029
2030 for (i = 0; i < XMA_MAX_STREAMS; i++) {
2031 av_audio_fifo_free(s->samples[0][i]);
2032 av_audio_fifo_free(s->samples[1][i]);
2033 }
2034
2035 return 0;
2036 }
2037
flush(WMAProDecodeCtx * s)2038 static void flush(WMAProDecodeCtx *s)
2039 {
2040 int i;
2041 /** reset output buffer as a part of it is used during the windowing of a
2042 new frame */
2043 for (i = 0; i < s->nb_channels; i++)
2044 memset(s->channel[i].out, 0, s->samples_per_frame *
2045 sizeof(*s->channel[i].out));
2046 s->packet_loss = 1;
2047 s->skip_packets = 0;
2048 s->eof_done = 0;
2049 s->skip_frame = 1;
2050 }
2051
2052 /**
2053 *@brief Clear decoder buffers (for seeking).
2054 *@param avctx codec context
2055 */
wmapro_flush(AVCodecContext * avctx)2056 static void wmapro_flush(AVCodecContext *avctx)
2057 {
2058 WMAProDecodeCtx *s = avctx->priv_data;
2059
2060 flush(s);
2061 }
2062
xma_flush(AVCodecContext * avctx)2063 static void xma_flush(AVCodecContext *avctx)
2064 {
2065 XMADecodeCtx *s = avctx->priv_data;
2066 int i;
2067
2068 for (i = 0; i < XMA_MAX_STREAMS; i++) {
2069 av_audio_fifo_reset(s->samples[0][i]);
2070 av_audio_fifo_reset(s->samples[1][i]);
2071 }
2072
2073 for (i = 0; i < s->num_streams; i++)
2074 flush(&s->xma[i]);
2075
2076 s->current_stream = 0;
2077 s->flushed = 0;
2078 }
2079
2080 /**
2081 *@brief wmapro decoder
2082 */
2083 const FFCodec ff_wmapro_decoder = {
2084 .p.name = "wmapro",
2085 .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
2086 .p.type = AVMEDIA_TYPE_AUDIO,
2087 .p.id = AV_CODEC_ID_WMAPRO,
2088 .priv_data_size = sizeof(WMAProDecodeCtx),
2089 .init = wmapro_decode_init,
2090 .close = wmapro_decode_end,
2091 FF_CODEC_DECODE_CB(wmapro_decode_packet),
2092 .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
2093 .flush = wmapro_flush,
2094 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2095 AV_SAMPLE_FMT_NONE },
2096 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2097 };
2098
2099 const FFCodec ff_xma1_decoder = {
2100 .p.name = "xma1",
2101 .p.long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 1"),
2102 .p.type = AVMEDIA_TYPE_AUDIO,
2103 .p.id = AV_CODEC_ID_XMA1,
2104 .priv_data_size = sizeof(XMADecodeCtx),
2105 .init = xma_decode_init,
2106 .close = xma_decode_end,
2107 FF_CODEC_DECODE_CB(xma_decode_packet),
2108 .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2109 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2110 AV_SAMPLE_FMT_NONE },
2111 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2112 };
2113
2114 const FFCodec ff_xma2_decoder = {
2115 .p.name = "xma2",
2116 .p.long_name = NULL_IF_CONFIG_SMALL("Xbox Media Audio 2"),
2117 .p.type = AVMEDIA_TYPE_AUDIO,
2118 .p.id = AV_CODEC_ID_XMA2,
2119 .priv_data_size = sizeof(XMADecodeCtx),
2120 .init = xma_decode_init,
2121 .close = xma_decode_end,
2122 FF_CODEC_DECODE_CB(xma_decode_packet),
2123 .flush = xma_flush,
2124 .p.capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
2125 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
2126 AV_SAMPLE_FMT_NONE },
2127 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
2128 };
2129