• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * WMA compatible encoder
3  * Copyright (c) 2007 Michael Niedermayer
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config_components.h"
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/ffmath.h"
26 
27 #include "avcodec.h"
28 #include "codec_internal.h"
29 #include "encode.h"
30 #include "internal.h"
31 #include "wma.h"
32 #include "libavutil/avassert.h"
33 
34 
encode_init(AVCodecContext * avctx)35 static av_cold int encode_init(AVCodecContext *avctx)
36 {
37     WMACodecContext *s = avctx->priv_data;
38     int i, flags1, flags2, block_align;
39     uint8_t *extradata;
40     int ret;
41 
42     s->avctx = avctx;
43 
44     if (avctx->ch_layout.nb_channels > MAX_CHANNELS) {
45         av_log(avctx, AV_LOG_ERROR,
46                "too many channels: got %i, need %i or fewer\n",
47                avctx->ch_layout.nb_channels, MAX_CHANNELS);
48         return AVERROR(EINVAL);
49     }
50 
51     if (avctx->sample_rate > 48000) {
52         av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz\n",
53                avctx->sample_rate);
54         return AVERROR(EINVAL);
55     }
56 
57     if (avctx->bit_rate < 24 * 1000) {
58         av_log(avctx, AV_LOG_ERROR,
59                "bitrate too low: got %"PRId64", need 24000 or higher\n",
60                avctx->bit_rate);
61         return AVERROR(EINVAL);
62     }
63 
64     /* extract flag info */
65     flags1 = 0;
66     flags2 = 1;
67     if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
68         extradata             = av_malloc(4);
69         if (!extradata)
70             return AVERROR(ENOMEM);
71         avctx->extradata_size = 4;
72         AV_WL16(extradata, flags1);
73         AV_WL16(extradata + 2, flags2);
74     } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
75         extradata             = av_mallocz(10);
76         if (!extradata)
77             return AVERROR(ENOMEM);
78         avctx->extradata_size = 10;
79         AV_WL32(extradata, flags1);
80         AV_WL16(extradata + 4, flags2);
81     } else {
82         av_assert0(0);
83     }
84     avctx->extradata          = extradata;
85     s->use_exp_vlc            = flags2 & 0x0001;
86     s->use_bit_reservoir      = flags2 & 0x0002;
87     s->use_variable_block_len = flags2 & 0x0004;
88     if (avctx->ch_layout.nb_channels == 2)
89         s->ms_stereo = 1;
90 
91     if ((ret = ff_wma_init(avctx, flags2)) < 0)
92         return ret;
93 
94     /* init MDCT */
95     for (i = 0; i < s->nb_block_sizes; i++) {
96         ret = ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
97         if (ret < 0)
98             return ret;
99     }
100 
101     block_align        = avctx->bit_rate * (int64_t) s->frame_len /
102                          (avctx->sample_rate * 8);
103     block_align        = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
104     avctx->block_align = block_align;
105     avctx->frame_size = avctx->initial_padding = s->frame_len;
106 
107     return 0;
108 }
109 
apply_window_and_mdct(AVCodecContext * avctx,const AVFrame * frame)110 static int apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
111 {
112     WMACodecContext *s = avctx->priv_data;
113     float **audio      = (float **) frame->extended_data;
114     int len            = frame->nb_samples;
115     int window_index   = s->frame_len_bits - s->block_len_bits;
116     FFTContext *mdct   = &s->mdct_ctx[window_index];
117     int ch;
118     const float *win   = s->windows[window_index];
119     int window_len     = 1 << s->block_len_bits;
120     float n            = 2.0 * 32768.0 / window_len;
121 
122     for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
123         memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
124         s->fdsp->vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
125         s->fdsp->vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
126                                     win, len);
127         s->fdsp->vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
128         mdct->mdct_calc(mdct, s->coefs[ch], s->output);
129         if (!isfinite(s->coefs[ch][0])) {
130             av_log(avctx, AV_LOG_ERROR, "Input contains NaN/+-Inf\n");
131             return AVERROR(EINVAL);
132         }
133     }
134 
135     return 0;
136 }
137 
138 // FIXME use for decoding too
init_exp(WMACodecContext * s,int ch,const int * exp_param)139 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
140 {
141     int n;
142     const uint16_t *ptr;
143     float v, *q, max_scale, *q_end;
144 
145     ptr       = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
146     q         = s->exponents[ch];
147     q_end     = q + s->block_len;
148     max_scale = 0;
149     while (q < q_end) {
150         /* XXX: use a table */
151         v         = ff_exp10(*exp_param++ *(1.0 / 16.0));
152         max_scale = FFMAX(max_scale, v);
153         n         = *ptr++;
154         do {
155             *q++ = v;
156         } while (--n);
157     }
158     s->max_exponent[ch] = max_scale;
159 }
160 
encode_exp_vlc(WMACodecContext * s,int ch,const int * exp_param)161 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
162 {
163     int last_exp;
164     const uint16_t *ptr;
165     float *q, *q_end;
166 
167     ptr   = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
168     q     = s->exponents[ch];
169     q_end = q + s->block_len;
170     if (s->version == 1) {
171         last_exp = *exp_param++;
172         av_assert0(last_exp - 10 >= 0 && last_exp - 10 < 32);
173         put_bits(&s->pb, 5, last_exp - 10);
174         q += *ptr++;
175     } else
176         last_exp = 36;
177     while (q < q_end) {
178         int exp  = *exp_param++;
179         int code = exp - last_exp + 60;
180         av_assert1(code >= 0 && code < 120);
181         put_bits(&s->pb, ff_aac_scalefactor_bits[code],
182                  ff_aac_scalefactor_code[code]);
183         /* XXX: use a table */
184         q       += *ptr++;
185         last_exp = exp;
186     }
187 }
188 
encode_block(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],int total_gain)189 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
190                         int total_gain)
191 {
192     int channels = s->avctx->ch_layout.nb_channels;
193     int v, bsize, ch, coef_nb_bits, parse_exponents;
194     float mdct_norm;
195     int nb_coefs[MAX_CHANNELS];
196     static const int fixed_exp[25] = {
197         20, 20, 20, 20, 20,
198         20, 20, 20, 20, 20,
199         20, 20, 20, 20, 20,
200         20, 20, 20, 20, 20,
201         20, 20, 20, 20, 20
202     };
203 
204     // FIXME remove duplication relative to decoder
205     if (s->use_variable_block_len) {
206         av_assert0(0); // FIXME not implemented
207     } else {
208         /* fixed block len */
209         s->next_block_len_bits = s->frame_len_bits;
210         s->prev_block_len_bits = s->frame_len_bits;
211         s->block_len_bits      = s->frame_len_bits;
212     }
213 
214     s->block_len = 1 << s->block_len_bits;
215 //     av_assert0((s->block_pos + s->block_len) <= s->frame_len);
216     bsize = s->frame_len_bits - s->block_len_bits;
217 
218     // FIXME factor
219     v = s->coefs_end[bsize] - s->coefs_start;
220     for (ch = 0; ch < channels; ch++)
221         nb_coefs[ch] = v;
222     {
223         int n4 = s->block_len / 2;
224         mdct_norm = 1.0 / (float) n4;
225         if (s->version == 1)
226             mdct_norm *= sqrt(n4);
227     }
228 
229     if (channels == 2)
230         put_bits(&s->pb, 1, !!s->ms_stereo);
231 
232     for (ch = 0; ch < channels; ch++) {
233         // FIXME only set channel_coded when needed, instead of always
234         s->channel_coded[ch] = 1;
235         if (s->channel_coded[ch])
236             init_exp(s, ch, fixed_exp);
237     }
238 
239     for (ch = 0; ch < channels; ch++) {
240         if (s->channel_coded[ch]) {
241             WMACoef *coefs1;
242             float *coefs, *exponents, mult;
243             int i, n;
244 
245             coefs1    = s->coefs1[ch];
246             exponents = s->exponents[ch];
247             mult      = ff_exp10(total_gain * 0.05) / s->max_exponent[ch];
248             mult     *= mdct_norm;
249             coefs     = src_coefs[ch];
250             if (s->use_noise_coding && 0) {
251                 av_assert0(0); // FIXME not implemented
252             } else {
253                 coefs += s->coefs_start;
254                 n      = nb_coefs[ch];
255                 for (i = 0; i < n; i++) {
256                     double t = *coefs++ / (exponents[i] * mult);
257                     if (t < -32768 || t > 32767)
258                         return -1;
259 
260                     coefs1[i] = lrint(t);
261                 }
262             }
263         }
264     }
265 
266     v = 0;
267     for (ch = 0; ch < channels; ch++) {
268         int a = s->channel_coded[ch];
269         put_bits(&s->pb, 1, a);
270         v |= a;
271     }
272 
273     if (!v)
274         return 1;
275 
276     for (v = total_gain - 1; v >= 127; v -= 127)
277         put_bits(&s->pb, 7, 127);
278     put_bits(&s->pb, 7, v);
279 
280     coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
281 
282     if (s->use_noise_coding) {
283         for (ch = 0; ch < channels; ch++) {
284             if (s->channel_coded[ch]) {
285                 int i, n;
286                 n = s->exponent_high_sizes[bsize];
287                 for (i = 0; i < n; i++) {
288                     put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
289                     if (0)
290                         nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
291                 }
292             }
293         }
294     }
295 
296     parse_exponents = 1;
297     if (s->block_len_bits != s->frame_len_bits)
298         put_bits(&s->pb, 1, parse_exponents);
299 
300     if (parse_exponents) {
301         for (ch = 0; ch < channels; ch++) {
302             if (s->channel_coded[ch]) {
303                 if (s->use_exp_vlc) {
304                     encode_exp_vlc(s, ch, fixed_exp);
305                 } else {
306                     av_assert0(0); // FIXME not implemented
307 //                    encode_exp_lsp(s, ch);
308                 }
309             }
310         }
311     } else
312         av_assert0(0); // FIXME not implemented
313 
314     for (ch = 0; ch < channels; ch++) {
315         if (s->channel_coded[ch]) {
316             int run, tindex;
317             WMACoef *ptr, *eptr;
318             tindex = (ch == 1 && s->ms_stereo);
319             ptr    = &s->coefs1[ch][0];
320             eptr   = ptr + nb_coefs[ch];
321 
322             run = 0;
323             for (; ptr < eptr; ptr++) {
324                 if (*ptr) {
325                     int level     = *ptr;
326                     int abs_level = FFABS(level);
327                     int code      = 0;
328                     if (abs_level <= s->coef_vlcs[tindex]->max_level)
329                         if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
330                             code = run + s->int_table[tindex][abs_level - 1];
331 
332                     av_assert2(code < s->coef_vlcs[tindex]->n);
333                     put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
334                              s->coef_vlcs[tindex]->huffcodes[code]);
335 
336                     if (code == 0) {
337                         if (1 << coef_nb_bits <= abs_level)
338                             return -1;
339 
340                         put_bits(&s->pb, coef_nb_bits, abs_level);
341                         put_bits(&s->pb, s->frame_len_bits, run);
342                     }
343                     // FIXME the sign is flipped somewhere
344                     put_bits(&s->pb, 1, level < 0);
345                     run = 0;
346                 } else
347                     run++;
348             }
349             if (run)
350                 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
351                          s->coef_vlcs[tindex]->huffcodes[1]);
352         }
353         if (s->version == 1 && channels >= 2)
354             align_put_bits(&s->pb);
355     }
356     return 0;
357 }
358 
encode_frame(WMACodecContext * s,float (* src_coefs)[BLOCK_MAX_SIZE],uint8_t * buf,int buf_size,int total_gain)359 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
360                         uint8_t *buf, int buf_size, int total_gain)
361 {
362     init_put_bits(&s->pb, buf, buf_size);
363 
364     if (s->use_bit_reservoir)
365         av_assert0(0); // FIXME not implemented
366     else if (encode_block(s, src_coefs, total_gain) < 0)
367         return INT_MAX;
368 
369     align_put_bits(&s->pb);
370 
371     return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
372 }
373 
encode_superframe(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)374 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
375                              const AVFrame *frame, int *got_packet_ptr)
376 {
377     WMACodecContext *s = avctx->priv_data;
378     int i, total_gain, ret, error;
379 
380     s->block_len_bits = s->frame_len_bits; // required by non variable block len
381     s->block_len      = 1 << s->block_len_bits;
382 
383     ret = apply_window_and_mdct(avctx, frame);
384 
385     if (ret < 0)
386         return ret;
387 
388     if (s->ms_stereo) {
389         float a, b;
390         int i;
391 
392         for (i = 0; i < s->block_len; i++) {
393             a              = s->coefs[0][i] * 0.5;
394             b              = s->coefs[1][i] * 0.5;
395             s->coefs[0][i] = a + b;
396             s->coefs[1][i] = a - b;
397         }
398     }
399 
400     if ((ret = ff_alloc_packet(avctx, avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE)) < 0)
401         return ret;
402 
403     total_gain = 128;
404     for (i = 64; i; i >>= 1) {
405         error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
406                                  total_gain - i);
407         if (error <= 0)
408             total_gain -= i;
409     }
410 
411     while(total_gain <= 128 && error > 0)
412         error = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain++);
413     if (error > 0) {
414         av_log(avctx, AV_LOG_ERROR, "Invalid input data or requested bitrate too low, cannot encode\n");
415         avpkt->size = 0;
416         return AVERROR(EINVAL);
417     }
418     av_assert0((put_bits_count(&s->pb) & 7) == 0);
419     i = avctx->block_align - put_bytes_count(&s->pb, 0);
420     av_assert0(i>=0);
421     while(i--)
422         put_bits(&s->pb, 8, 'N');
423 
424     flush_put_bits(&s->pb);
425     av_assert0(put_bits_ptr(&s->pb) - s->pb.buf == avctx->block_align);
426 
427     if (frame->pts != AV_NOPTS_VALUE)
428         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
429 
430     avpkt->size     = avctx->block_align;
431     *got_packet_ptr = 1;
432     return 0;
433 }
434 
435 #if CONFIG_WMAV1_ENCODER
436 const FFCodec ff_wmav1_encoder = {
437     .p.name         = "wmav1",
438     .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
439     .p.type         = AVMEDIA_TYPE_AUDIO,
440     .p.id           = AV_CODEC_ID_WMAV1,
441     .priv_data_size = sizeof(WMACodecContext),
442     .init           = encode_init,
443     FF_CODEC_ENCODE_CB(encode_superframe),
444     .close          = ff_wma_end,
445     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
446                                                       AV_SAMPLE_FMT_NONE },
447     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
448 };
449 #endif
450 #if CONFIG_WMAV2_ENCODER
451 const FFCodec ff_wmav2_encoder = {
452     .p.name         = "wmav2",
453     .p.long_name    = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
454     .p.type         = AVMEDIA_TYPE_AUDIO,
455     .p.id           = AV_CODEC_ID_WMAV2,
456     .priv_data_size = sizeof(WMACodecContext),
457     .init           = encode_init,
458     FF_CODEC_ENCODE_CB(encode_superframe),
459     .close          = ff_wma_end,
460     .p.sample_fmts  = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
461                                                       AV_SAMPLE_FMT_NONE },
462     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
463 };
464 #endif
465