1 /*
2 * WMA compatible encoder
3 * Copyright (c) 2007 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "libavutil/attributes.h"
23 #include "libavutil/ffmath.h"
24
25 #include "avcodec.h"
26 #include "internal.h"
27 #include "wma.h"
28 #include "libavutil/avassert.h"
29
30
encode_init(AVCodecContext * avctx)31 static av_cold int encode_init(AVCodecContext *avctx)
32 {
33 WMACodecContext *s = avctx->priv_data;
34 int i, flags1, flags2, block_align;
35 uint8_t *extradata;
36 int ret;
37
38 s->avctx = avctx;
39
40 if (avctx->channels > MAX_CHANNELS) {
41 av_log(avctx, AV_LOG_ERROR,
42 "too many channels: got %i, need %i or fewer\n",
43 avctx->channels, MAX_CHANNELS);
44 return AVERROR(EINVAL);
45 }
46
47 if (avctx->sample_rate > 48000) {
48 av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
49 avctx->sample_rate);
50 return AVERROR(EINVAL);
51 }
52
53 if (avctx->bit_rate < 24 * 1000) {
54 av_log(avctx, AV_LOG_ERROR,
55 "bitrate too low: got %"PRId64", need 24000 or higher\n",
56 avctx->bit_rate);
57 return AVERROR(EINVAL);
58 }
59
60 /* extract flag info */
61 flags1 = 0;
62 flags2 = 1;
63 if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
64 extradata = av_malloc(4);
65 if (!extradata)
66 return AVERROR(ENOMEM);
67 avctx->extradata_size = 4;
68 AV_WL16(extradata, flags1);
69 AV_WL16(extradata + 2, flags2);
70 } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
71 extradata = av_mallocz(10);
72 if (!extradata)
73 return AVERROR(ENOMEM);
74 avctx->extradata_size = 10;
75 AV_WL32(extradata, flags1);
76 AV_WL16(extradata + 4, flags2);
77 } else {
78 av_assert0(0);
79 }
80 avctx->extradata = extradata;
81 s->use_exp_vlc = flags2 & 0x0001;
82 s->use_bit_reservoir = flags2 & 0x0002;
83 s->use_variable_block_len = flags2 & 0x0004;
84 if (avctx->channels == 2)
85 s->ms_stereo = 1;
86
87 if ((ret = ff_wma_init(avctx, flags2)) < 0)
88 return ret;
89
90 /* init MDCT */
91 for (i = 0; i < s->nb_block_sizes; i++)
92 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
93
94 block_align = avctx->bit_rate * (int64_t) s->frame_len /
95 (avctx->sample_rate * 8);
96 block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
97 avctx->block_align = block_align;
98 avctx->frame_size = avctx->initial_padding = s->frame_len;
99
100 return 0;
101 }
102
apply_window_and_mdct(AVCodecContext * avctx,const AVFrame * frame)103 static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
104 {
105 WMACodecContext *s = avctx->priv_data;
106 float **audio = (float **) frame->extended_data;
107 int len = frame->nb_samples;
108 int window_index = s->frame_len_bits - s->block_len_bits;
109 FFTContext *mdct = &s->mdct_ctx[window_index];
110 int ch;
111 const float *win = s->windows[window_index];
112 int window_len = 1 << s->block_len_bits;
113 float n = 2.0 * 32768.0 / window_len;
114
115 for (ch = 0; ch < avctx->channels; ch++) {
116 memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
117 s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
118 s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
119 win, len);
120 s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
121 mdct->mdct_calc(mdct, s->coefs[ch], s->output);
122 if (!isfinite(s->coefs[ch][0])) {
123 av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
124 return AVERROR(EINVAL);
125 }
126 }
127
128 return 0;
129 }
130
131 // FIXME use for decoding too
init_exp(WMACodecContext * s,int ch,const int * exp_param)132 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
133 {
134 int n;
135 const uint16_t *ptr;
136 float v, *q, max_scale, *q_end;
137
138 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
139 q = s->exponents[ch];
140 q_end = q + s->block_len;
141 max_scale = 0;
142 while (q < q_end) {
143 /* XXX: use a table */
144 v = ff_exp10(*exp_param++ *(1.0 / 16.0));
145 max_scale = FFMAX(max_scale, v);
146 n = *ptr++;
147 do {
148 *q++ = v;
149 } while (--n);
150 }
151 s->max_exponent[ch] = max_scale;
152 }
153
encode_exp_vlc(WMACodecContext * s,int ch,const int * exp_param)154 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
155 {
156 int last_exp;
157 const uint16_t *ptr;
158 float *q, *q_end;
159
160 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
161 q = s->exponents[ch];
162 q_end = q + s->block_len;
163 if (s->version == 1) {
164 last_exp = *exp_param++;
165 av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
166 put_bits(&s->pb, 5, last_exp - 10);
167 q += *ptr++;
168 } else
169 last_exp = 36;
170 while (q < q_end) {
171 int exp = *exp_param++;
172 int code = exp - last_exp + 60;
173 av_assert1(code >= 0 && code < 120);
174 put_bits(&s->pb, ff_aac_scalefactor_bits[code],
175 ff_aac_scalefactor_code[code]);
176 /* XXX: use a table */
177 q += *ptr++;
178 last_exp = exp;
179 }
180 }
181
encode_block(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],int total_gain)182 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
183 int total_gain)
184 {
185 int v, bsize, ch, coef_nb_bits, parse_exponents;
186 float mdct_norm;
187 int nb_coefs[MAX_CHANNELS];
188 static const int fixed_exp[25] = {
189 20, 20, 20, 20, 20,
190 20, 20, 20, 20, 20,
191 20, 20, 20, 20, 20,
192 20, 20, 20, 20, 20,
193 20, 20, 20, 20, 20
194 };
195
196 // FIXME remove duplication relative to decoder
197 if (s->use_variable_block_len) {
198 av_assert0(0); // FIXME not implemented
199 } else {
200 /* fixed block len */
201 s->next_block_len_bits = s->frame_len_bits;
202 s->prev_block_len_bits = s->frame_len_bits;
203 s->block_len_bits = s->frame_len_bits;
204 }
205
206 s->block_len = 1 << s->block_len_bits;
207 // av_assert0((s->block_pos + s->block_len) <= s->frame_len);
208 bsize = s->frame_len_bits - s->block_len_bits;
209
210 // FIXME factor
211 v = s->coefs_end[bsize] - s->coefs_start;
212 for (ch = 0; ch < s->avctx->channels; ch++)
213 nb_coefs[ch] = v;
214 {
215 int n4 = s->block_len / 2;
216 mdct_norm = 1.0 / (float) n4;
217 if (s->version == 1)
218 mdct_norm *= sqrt(n4);
219 }
220
221 if (s->avctx->channels == 2)
222 put_bits(&s->pb, 1, !!s->ms_stereo);
223
224 for (ch = 0; ch < s->avctx->channels; ch++) {
225 // FIXME only set channel_coded when needed, instead of always
226 s->channel_coded[ch] = 1;
227 if (s->channel_coded[ch])
228 init_exp(s, ch, fixed_exp);
229 }
230
231 for (ch = 0; ch < s->avctx->channels; ch++) {
232 if (s->channel_coded[ch]) {
233 WMACoef *coefs1;
234 float *coefs, *exponents, mult;
235 int i, n;
236
237 coefs1 = s->coefs1[ch];
238 exponents = s->exponents[ch];
239 mult = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
240 mult *= mdct_norm;
241 coefs = src_coefs[ch];
242 if (s->use_noise_coding && 0) {
243 av_assert0(0); // FIXME not implemented
244 } else {
245 coefs += s->coefs_start;
246 n = nb_coefs[ch];
247 for (i = 0; i < n; i++) {
248 double t = *coefs++ / (exponents[i] * mult);
249 if (t < -32768 || t > 32767)
250 return -1;
251
252 coefs1[i] = lrint(t);
253 }
254 }
255 }
256 }
257
258 v = 0;
259 for (ch = 0; ch < s->avctx->channels; ch++) {
260 int a = s->channel_coded[ch];
261 put_bits(&s->pb, 1, a);
262 v |= a;
263 }
264
265 if (!v)
266 return 1;
267
268 for (v = total_gain - 1; v >= 127; v -= 127)
269 put_bits(&s->pb, 7, 127);
270 put_bits(&s->pb, 7, v);
271
272 coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
273
274 if (s->use_noise_coding) {
275 for (ch = 0; ch < s->avctx->channels; ch++) {
276 if (s->channel_coded[ch]) {
277 int i, n;
278 n = s->exponent_high_sizes[bsize];
279 for (i = 0; i < n; i++) {
280 put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
281 if (0)
282 nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
283 }
284 }
285 }
286 }
287
288 parse_exponents = 1;
289 if (s->block_len_bits != s->frame_len_bits)
290 put_bits(&s->pb, 1, parse_exponents);
291
292 if (parse_exponents) {
293 for (ch = 0; ch < s->avctx->channels; ch++) {
294 if (s->channel_coded[ch]) {
295 if (s->use_exp_vlc) {
296 encode_exp_vlc(s, ch, fixed_exp);
297 } else {
298 av_assert0(0); // FIXME not implemented
299 // encode_exp_lsp(s, ch);
300 }
301 }
302 }
303 } else
304 av_assert0(0); // FIXME not implemented
305
306 for (ch = 0; ch < s->avctx->channels; ch++) {
307 if (s->channel_coded[ch]) {
308 int run, tindex;
309 WMACoef *ptr, *eptr;
310 tindex = (ch == 1 && s->ms_stereo);
311 ptr = &s->coefs1[ch][0];
312 eptr = ptr + nb_coefs[ch];
313
314 run = 0;
315 for (; ptr < eptr; ptr++) {
316 if (*ptr) {
317 int level = *ptr;
318 int abs_level = FFABS(level);
319 int code = 0;
320 if (abs_level <= s->coef_vlcs[tindex]->max_level)
321 if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
322 code = run + s->int_table[tindex][abs_level - 1];
323
324 av_assert2(code < s->coef_vlcs[tindex]->n);
325 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
326 s->coef_vlcs[tindex]->huffcodes[code]);
327
328 if (code == 0) {
329 if (1 << coef_nb_bits <= abs_level)
330 return -1;
331
332 put_bits(&s->pb, coef_nb_bits, abs_level);
333 put_bits(&s->pb, s->frame_len_bits, run);
334 }
335 // FIXME the sign is flipped somewhere
336 put_bits(&s->pb, 1, level < 0);
337 run = 0;
338 } else
339 run++;
340 }
341 if (run)
342 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
343 s->coef_vlcs[tindex]->huffcodes[1]);
344 }
345 if (s->version == 1 && s->avctx->channels >= 2)
346 align_put_bits(&s->pb);
347 }
348 return 0;
349 }
350
encode_frame(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],uint8_t * buf,int buf_size,int total_gain)351 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
352 uint8_t *buf, int buf_size, int total_gain)
353 {
354 init_put_bits(&s->pb, buf, buf_size);
355
356 if (s->use_bit_reservoir)
357 av_assert0(0); // FIXME not implemented
358 else if (encode_block(s, src_coefs, total_gain) < 0)
359 return INT_MAX;
360
361 align_put_bits(&s->pb);
362
363 return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
364 }
365
encode_superframe(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)366 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
367 const AVFrame *frame, int *got_packet_ptr)
368 {
369 WMACodecContext *s = avctx->priv_data;
370 int i, total_gain, ret, error;
371
372 s->block_len_bits = s->frame_len_bits; // required by non variable block len
373 s->block_len = 1 << s->block_len_bits;
374
375 ret = apply_window_and_mdct(avctx, frame);
376
377 if (ret < 0)
378 return ret;
379
380 if (s->ms_stereo) {
381 float a, b;
382 int i;
383
384 for (i = 0; i < s->block_len; i++) {
385 a = s->coefs[0][i] * 0.5;
386 b = s->coefs[1][i] * 0.5;
387 s->coefs[0][i] = a + b;
388 s->coefs[1][i] = a - b;
389 }
390 }
391
392 if ((ret = ff_alloc_packet2(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE, 0)) < 0)
393 return ret;
394
395 total_gain = 128;
396 for (i = 64; i; i >>= 1) {
397 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
398 total_gain - i);
399 if (error <= 0)
400 total_gain -= i;
401 }
402
403 while(total_gain <= 128 && error > 0)
404 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
405 if (error > 0) {
406 av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
407 avpkt->size = 0;
408 return AVERROR(EINVAL);
409 }
410 av_assert0((put_bits_count(&s->pb) & 7) == 0);
411 i= avctx->block_align - (put_bits_count(&s->pb)+7)/8;
412 av_assert0(i>=0);
413 while(i--)
414 put_bits(&s->pb, 8, 'N');
415
416 flush_put_bits(&s->pb);
417 av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
418
419 if (frame->pts != AV_NOPTS_VALUE)
420 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
421
422 avpkt->size = avctx->block_align;
423 *got_packet_ptr = 1;
424 return 0;
425 }
426
427 #if CONFIG_WMAV1_ENCODER
428 AVCodec ff_wmav1_encoder = {
429 .name = "wmav1",
430 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
431 .type = AVMEDIA_TYPE_AUDIO,
432 .id = AV_CODEC_ID_WMAV1,
433 .priv_data_size = sizeof(WMACodecContext),
434 .init = encode_init,
435 .encode2 = encode_superframe,
436 .close = ff_wma_end,
437 .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
438 AV_SAMPLE_FMT_NONE },
439 };
440 #endif
441 #if CONFIG_WMAV2_ENCODER
442 AVCodec ff_wmav2_encoder = {
443 .name = "wmav2",
444 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
445 .type = AVMEDIA_TYPE_AUDIO,
446 .id = AV_CODEC_ID_WMAV2,
447 .priv_data_size = sizeof(WMACodecContext),
448 .init = encode_init,
449 .encode2 = encode_superframe,
450 .close = ff_wma_end,
451 .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
452 AV_SAMPLE_FMT_NONE },
453 };
454 #endif
455