1 /*
2 * WMA compatible encoder
3 * Copyright (c) 2007 Michael Niedermayer
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include "config_components.h"
23
24 #include "libavutil/attributes.h"
25 #include "libavutil/ffmath.h"
26
27 #include "avcodec.h"
28 #include "codec_internal.h"
29 #include "encode.h"
30 #include "internal.h"
31 #include "wma.h"
32 #include "libavutil/avassert.h"
33
34
encode_init(AVCodecContext * avctx)35 static av_cold int encode_init(AVCodecContext *avctx)
36 {
37 WMACodecContext *s = avctx->priv_data;
38 int i, flags1, flags2, block_align;
39 uint8_t *extradata;
40 int ret;
41
42 s->avctx = avctx;
43
44 if (avctx->ch_layout.nb_channels > MAX_CHANNELS) {
45 av_log(avctx, AV_LOG_ERROR,
46 "too many channels: got %i, need %i or fewer\n",
47 avctx->ch_layout.nb_channels, MAX_CHANNELS);
48 return AVERROR(EINVAL);
49 }
50
51 if (avctx->sample_rate > 48000) {
52 av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
53 avctx->sample_rate);
54 return AVERROR(EINVAL);
55 }
56
57 if (avctx->bit_rate < 24 * 1000) {
58 av_log(avctx, AV_LOG_ERROR,
59 "bitrate too low: got %"PRId64", need 24000 or higher\n",
60 avctx->bit_rate);
61 return AVERROR(EINVAL);
62 }
63
64 /* extract flag info */
65 flags1 = 0;
66 flags2 = 1;
67 if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
68 extradata = av_malloc(4);
69 if (!extradata)
70 return AVERROR(ENOMEM);
71 avctx->extradata_size = 4;
72 AV_WL16(extradata, flags1);
73 AV_WL16(extradata + 2, flags2);
74 } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
75 extradata = av_mallocz(10);
76 if (!extradata)
77 return AVERROR(ENOMEM);
78 avctx->extradata_size = 10;
79 AV_WL32(extradata, flags1);
80 AV_WL16(extradata + 4, flags2);
81 } else {
82 av_assert0(0);
83 }
84 avctx->extradata = extradata;
85 s->use_exp_vlc = flags2 & 0x0001;
86 s->use_bit_reservoir = flags2 & 0x0002;
87 s->use_variable_block_len = flags2 & 0x0004;
88 if (avctx->ch_layout.nb_channels == 2)
89 s->ms_stereo = 1;
90
91 if ((ret = ff_wma_init(avctx, flags2)) < 0)
92 return ret;
93
94 /* init MDCT */
95 for (i = 0; i < s->nb_block_sizes; i++) {
96 ret = ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
97 if (ret < 0)
98 return ret;
99 }
100
101 block_align = avctx->bit_rate * (int64_t) s->frame_len /
102 (avctx->sample_rate * 8);
103 block_align = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
104 avctx->block_align = block_align;
105 avctx->frame_size = avctx->initial_padding = s->frame_len;
106
107 return 0;
108 }
109
apply_window_and_mdct(AVCodecContext * avctx,const AVFrame * frame)110 static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
111 {
112 WMACodecContext *s = avctx->priv_data;
113 float **audio = (float **) frame->extended_data;
114 int len = frame->nb_samples;
115 int window_index = s->frame_len_bits - s->block_len_bits;
116 FFTContext *mdct = &s->mdct_ctx[window_index];
117 int ch;
118 const float *win = s->windows[window_index];
119 int window_len = 1 << s->block_len_bits;
120 float n = 2.0 * 32768.0 / window_len;
121
122 for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
123 memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
124 s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
125 s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
126 win, len);
127 s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
128 mdct->mdct_calc(mdct, s->coefs[ch], s->output);
129 if (!isfinite(s->coefs[ch][0])) {
130 av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
131 return AVERROR(EINVAL);
132 }
133 }
134
135 return 0;
136 }
137
138 // FIXME use for decoding too
init_exp(WMACodecContext * s,int ch,const int * exp_param)139 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
140 {
141 int n;
142 const uint16_t *ptr;
143 float v, *q, max_scale, *q_end;
144
145 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
146 q = s->exponents[ch];
147 q_end = q + s->block_len;
148 max_scale = 0;
149 while (q < q_end) {
150 /* XXX: use a table */
151 v = ff_exp10(*exp_param++ *(1.0 / 16.0));
152 max_scale = FFMAX(max_scale, v);
153 n = *ptr++;
154 do {
155 *q++ = v;
156 } while (--n);
157 }
158 s->max_exponent[ch] = max_scale;
159 }
160
encode_exp_vlc(WMACodecContext * s,int ch,const int * exp_param)161 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
162 {
163 int last_exp;
164 const uint16_t *ptr;
165 float *q, *q_end;
166
167 ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
168 q = s->exponents[ch];
169 q_end = q + s->block_len;
170 if (s->version == 1) {
171 last_exp = *exp_param++;
172 av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
173 put_bits(&s->pb, 5, last_exp - 10);
174 q += *ptr++;
175 } else
176 last_exp = 36;
177 while (q < q_end) {
178 int exp = *exp_param++;
179 int code = exp - last_exp + 60;
180 av_assert1(code >= 0 && code < 120);
181 put_bits(&s->pb, ff_aac_scalefactor_bits[code],
182 ff_aac_scalefactor_code[code]);
183 /* XXX: use a table */
184 q += *ptr++;
185 last_exp = exp;
186 }
187 }
188
encode_block(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],int total_gain)189 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
190 int total_gain)
191 {
192 int channels = s->avctx->ch_layout.nb_channels;
193 int v, bsize, ch, coef_nb_bits, parse_exponents;
194 float mdct_norm;
195 int nb_coefs[MAX_CHANNELS];
196 static const int fixed_exp[25] = {
197 20, 20, 20, 20, 20,
198 20, 20, 20, 20, 20,
199 20, 20, 20, 20, 20,
200 20, 20, 20, 20, 20,
201 20, 20, 20, 20, 20
202 };
203
204 // FIXME remove duplication relative to decoder
205 if (s->use_variable_block_len) {
206 av_assert0(0); // FIXME not implemented
207 } else {
208 /* fixed block len */
209 s->next_block_len_bits = s->frame_len_bits;
210 s->prev_block_len_bits = s->frame_len_bits;
211 s->block_len_bits = s->frame_len_bits;
212 }
213
214 s->block_len = 1 << s->block_len_bits;
215 // av_assert0((s->block_pos + s->block_len) <= s->frame_len);
216 bsize = s->frame_len_bits - s->block_len_bits;
217
218 // FIXME factor
219 v = s->coefs_end[bsize] - s->coefs_start;
220 for (ch = 0; ch < channels; ch++)
221 nb_coefs[ch] = v;
222 {
223 int n4 = s->block_len / 2;
224 mdct_norm = 1.0 / (float) n4;
225 if (s->version == 1)
226 mdct_norm *= sqrt(n4);
227 }
228
229 if (channels == 2)
230 put_bits(&s->pb, 1, !!s->ms_stereo);
231
232 for (ch = 0; ch < channels; ch++) {
233 // FIXME only set channel_coded when needed, instead of always
234 s->channel_coded[ch] = 1;
235 if (s->channel_coded[ch])
236 init_exp(s, ch, fixed_exp);
237 }
238
239 for (ch = 0; ch < channels; ch++) {
240 if (s->channel_coded[ch]) {
241 WMACoef *coefs1;
242 float *coefs, *exponents, mult;
243 int i, n;
244
245 coefs1 = s->coefs1[ch];
246 exponents = s->exponents[ch];
247 mult = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
248 mult *= mdct_norm;
249 coefs = src_coefs[ch];
250 if (s->use_noise_coding && 0) {
251 av_assert0(0); // FIXME not implemented
252 } else {
253 coefs += s->coefs_start;
254 n = nb_coefs[ch];
255 for (i = 0; i < n; i++) {
256 double t = *coefs++ / (exponents[i] * mult);
257 if (t < -32768 || t > 32767)
258 return -1;
259
260 coefs1[i] = lrint(t);
261 }
262 }
263 }
264 }
265
266 v = 0;
267 for (ch = 0; ch < channels; ch++) {
268 int a = s->channel_coded[ch];
269 put_bits(&s->pb, 1, a);
270 v |= a;
271 }
272
273 if (!v)
274 return 1;
275
276 for (v = total_gain - 1; v >= 127; v -= 127)
277 put_bits(&s->pb, 7, 127);
278 put_bits(&s->pb, 7, v);
279
280 coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
281
282 if (s->use_noise_coding) {
283 for (ch = 0; ch < channels; ch++) {
284 if (s->channel_coded[ch]) {
285 int i, n;
286 n = s->exponent_high_sizes[bsize];
287 for (i = 0; i < n; i++) {
288 put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
289 if (0)
290 nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
291 }
292 }
293 }
294 }
295
296 parse_exponents = 1;
297 if (s->block_len_bits != s->frame_len_bits)
298 put_bits(&s->pb, 1, parse_exponents);
299
300 if (parse_exponents) {
301 for (ch = 0; ch < channels; ch++) {
302 if (s->channel_coded[ch]) {
303 if (s->use_exp_vlc) {
304 encode_exp_vlc(s, ch, fixed_exp);
305 } else {
306 av_assert0(0); // FIXME not implemented
307 // encode_exp_lsp(s, ch);
308 }
309 }
310 }
311 } else
312 av_assert0(0); // FIXME not implemented
313
314 for (ch = 0; ch < channels; ch++) {
315 if (s->channel_coded[ch]) {
316 int run, tindex;
317 WMACoef *ptr, *eptr;
318 tindex = (ch == 1 && s->ms_stereo);
319 ptr = &s->coefs1[ch][0];
320 eptr = ptr + nb_coefs[ch];
321
322 run = 0;
323 for (; ptr < eptr; ptr++) {
324 if (*ptr) {
325 int level = *ptr;
326 int abs_level = FFABS(level);
327 int code = 0;
328 if (abs_level <= s->coef_vlcs[tindex]->max_level)
329 if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
330 code = run + s->int_table[tindex][abs_level - 1];
331
332 av_assert2(code < s->coef_vlcs[tindex]->n);
333 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
334 s->coef_vlcs[tindex]->huffcodes[code]);
335
336 if (code == 0) {
337 if (1 << coef_nb_bits <= abs_level)
338 return -1;
339
340 put_bits(&s->pb, coef_nb_bits, abs_level);
341 put_bits(&s->pb, s->frame_len_bits, run);
342 }
343 // FIXME the sign is flipped somewhere
344 put_bits(&s->pb, 1, level < 0);
345 run = 0;
346 } else
347 run++;
348 }
349 if (run)
350 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
351 s->coef_vlcs[tindex]->huffcodes[1]);
352 }
353 if (s->version == 1 && channels >= 2)
354 align_put_bits(&s->pb);
355 }
356 return 0;
357 }
358
encode_frame(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],uint8_t * buf,int buf_size,int total_gain)359 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
360 uint8_t *buf, int buf_size, int total_gain)
361 {
362 init_put_bits(&s->pb, buf, buf_size);
363
364 if (s->use_bit_reservoir)
365 av_assert0(0); // FIXME not implemented
366 else if (encode_block(s, src_coefs, total_gain) < 0)
367 return INT_MAX;
368
369 align_put_bits(&s->pb);
370
371 return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
372 }
373
encode_superframe(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)374 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
375 const AVFrame *frame, int *got_packet_ptr)
376 {
377 WMACodecContext *s = avctx->priv_data;
378 int i, total_gain, ret, error;
379
380 s->block_len_bits = s->frame_len_bits; // required by non variable block len
381 s->block_len = 1 << s->block_len_bits;
382
383 ret = apply_window_and_mdct(avctx, frame);
384
385 if (ret < 0)
386 return ret;
387
388 if (s->ms_stereo) {
389 float a, b;
390 int i;
391
392 for (i = 0; i < s->block_len; i++) {
393 a = s->coefs[0][i] * 0.5;
394 b = s->coefs[1][i] * 0.5;
395 s->coefs[0][i] = a + b;
396 s->coefs[1][i] = a - b;
397 }
398 }
399
400 if ((ret = ff_alloc_packet(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
401 return ret;
402
403 total_gain = 128;
404 for (i = 64; i; i >>= 1) {
405 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
406 total_gain - i);
407 if (error <= 0)
408 total_gain -= i;
409 }
410
411 while(total_gain <= 128 && error > 0)
412 error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
413 if (error > 0) {
414 av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
415 avpkt->size = 0;
416 return AVERROR(EINVAL);
417 }
418 av_assert0((put_bits_count(&s->pb) & 7) == 0);
419 i = avctx->block_align - put_bytes_count(&s->pb, 0);
420 av_assert0(i>=0);
421 while(i--)
422 put_bits(&s->pb, 8, 'N');
423
424 flush_put_bits(&s->pb);
425 av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
426
427 if (frame->pts != AV_NOPTS_VALUE)
428 avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
429
430 avpkt->size = avctx->block_align;
431 *got_packet_ptr = 1;
432 return 0;
433 }
434
435 #if CONFIG_WMAV1_ENCODER
436 const FFCodec ff_wmav1_encoder = {
437 .p.name = "wmav1",
438 .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
439 .p.type = AVMEDIA_TYPE_AUDIO,
440 .p.id = AV_CODEC_ID_WMAV1,
441 .priv_data_size = sizeof(WMACodecContext),
442 .init = encode_init,
443 FF_CODEC_ENCODE_CB(encode_superframe),
444 .close = ff_wma_end,
445 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
446 AV_SAMPLE_FMT_NONE },
447 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
448 };
449 #endif
450 #if CONFIG_WMAV2_ENCODER
451 const FFCodec ff_wmav2_encoder = {
452 .p.name = "wmav2",
453 .p.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
454 .p.type = AVMEDIA_TYPE_AUDIO,
455 .p.id = AV_CODEC_ID_WMAV2,
456 .priv_data_size = sizeof(WMACodecContext),
457 .init = encode_init,
458 FF_CODEC_ENCODE_CB(encode_superframe),
459 .close = ff_wma_end,
460 .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
461 AV_SAMPLE_FMT_NONE },
462 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
463 };
464 #endif
465