• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Nellymoser encoder
3  * This code is developed as part of Google Summer of Code 2008 Program.
4  *
5  * Copyright (c) 2008 Bartlomiej Wolowiec
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file
26  * Nellymoser encoder
27  * by Bartlomiej Wolowiec
28  *
29  * Generic codec information: libavcodec/nellymoserdec.c
30  *
31  * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32  *                             (Copyright Joseph Artsimovich and UAB "DKD")
33  *
34  * for more information about nellymoser format, visit:
35  * http://wiki.multimedia.cx/index.php?title=Nellymoser
36  */
37 
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 #include "libavutil/thread.h"
42 
43 #include "audio_frame_queue.h"
44 #include "avcodec.h"
45 #include "fft.h"
46 #include "internal.h"
47 #include "nellymoser.h"
48 #include "sinewin.h"
49 
50 #define BITSTREAM_WRITER_LE
51 #include "put_bits.h"
52 
53 #define POW_TABLE_SIZE (1<<11)
54 #define POW_TABLE_OFFSET 3
55 #define OPT_SIZE ((1<<15) + 3000)
56 
57 typedef struct NellyMoserEncodeContext {
58     AVCodecContext  *avctx;
59     int             last_frame;
60     AVFloatDSPContext *fdsp;
61     FFTContext      mdct_ctx;
62     AudioFrameQueue afq;
63     DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
64     DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
65     DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
66     float           (*opt )[OPT_SIZE];
67     uint8_t         (*path)[OPT_SIZE];
68 } NellyMoserEncodeContext;
69 
70 static float pow_table[POW_TABLE_SIZE];     ///< pow(2, -i / 2048.0 - 3.0);
71 
72 static const uint8_t sf_lut[96] = {
73      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
74      5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
75     15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
76     27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
77     41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
78     54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
79 };
80 
81 static const uint8_t sf_delta_lut[78] = {
82      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
83      4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
84     13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
85     23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
86     28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
87 };
88 
89 static const uint8_t quant_lut[230] = {
90      0,
91 
92      0,  1,  2,
93 
94      0,  1,  2,  3,  4,  5,  6,
95 
96      0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
97     12, 13, 13, 13, 14,
98 
99      0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
100      8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
101     22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
102     30,
103 
104      0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
105      4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
106     10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
107     15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
108     21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
109     33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
110     46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
111     53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
112     58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
113     61, 61, 61, 61, 62,
114 };
115 
116 static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
117 static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
118 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
119 
apply_mdct(NellyMoserEncodeContext * s)120 static void apply_mdct(NellyMoserEncodeContext *s)
121 {
122     float *in0 = s->buf;
123     float *in1 = s->buf + NELLY_BUF_LEN;
124     float *in2 = s->buf + 2 * NELLY_BUF_LEN;
125 
126     s->fdsp->vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
127     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
128     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
129 
130     s->fdsp->vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
131     s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132     s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
133 }
134 
encode_end(AVCodecContext * avctx)135 static av_cold int encode_end(AVCodecContext *avctx)
136 {
137     NellyMoserEncodeContext *s = avctx->priv_data;
138 
139     ff_mdct_end(&s->mdct_ctx);
140 
141     av_freep(&s->opt);
142     av_freep(&s->path);
143     ff_af_queue_close(&s->afq);
144     av_freep(&s->fdsp);
145 
146     return 0;
147 }
148 
nellymoser_init_static(void)149 static av_cold void nellymoser_init_static(void)
150 {
151     /* faster way of doing
152     for (int i = 0; i < POW_TABLE_SIZE; i++)
153        pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
154     pow_table[0] = 1;
155     pow_table[1024] = M_SQRT1_2;
156     for (int i = 1; i < 513; i++) {
157         double tmp = exp2(-i / 2048.0);
158         pow_table[i] = tmp;
159         pow_table[1024-i] = M_SQRT1_2 / tmp;
160         pow_table[1024+i] = tmp * M_SQRT1_2;
161         pow_table[2048-i] = 0.5 / tmp;
162     }
163     /* Generate overlap window */
164     ff_init_ff_sine_windows(7);
165 }
166 
encode_init(AVCodecContext * avctx)167 static av_cold int encode_init(AVCodecContext *avctx)
168 {
169     static AVOnce init_static_once = AV_ONCE_INIT;
170     NellyMoserEncodeContext *s = avctx->priv_data;
171     int ret;
172 
173     if (avctx->channels != 1) {
174         av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
175         return AVERROR(EINVAL);
176     }
177 
178     if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
179         avctx->sample_rate != 11025 &&
180         avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
181         avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
182         av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
183         return AVERROR(EINVAL);
184     }
185 
186     avctx->frame_size = NELLY_SAMPLES;
187     avctx->initial_padding = NELLY_BUF_LEN;
188     ff_af_queue_init(avctx, &s->afq);
189     s->avctx = avctx;
190     if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
191         return ret;
192     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
193     if (!s->fdsp)
194         return AVERROR(ENOMEM);
195 
196     if (s->avctx->trellis) {
197         s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
198         s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
199         if (!s->opt || !s->path)
200             return AVERROR(ENOMEM);
201     }
202 
203     ff_thread_once(&init_static_once, nellymoser_init_static);
204 
205     return 0;
206 }
207 
208 #define find_best(val, table, LUT, LUT_add, LUT_size) \
209     best_idx = \
210         LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
211     if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
212         best_idx++;
213 
get_exponent_greedy(NellyMoserEncodeContext * s,float * cand,int * idx_table)214 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
215 {
216     int band, best_idx, power_idx = 0;
217     float power_candidate;
218 
219     //base exponent
220     find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
221     idx_table[0] = best_idx;
222     power_idx = ff_nelly_init_table[best_idx];
223 
224     for (band = 1; band < NELLY_BANDS; band++) {
225         power_candidate = cand[band] - power_idx;
226         find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
227         idx_table[band] = best_idx;
228         power_idx += ff_nelly_delta_table[best_idx];
229     }
230 }
231 
distance(float x,float y,int band)232 static inline float distance(float x, float y, int band)
233 {
234     //return pow(fabs(x-y), 2.0);
235     float tmp = x - y;
236     return tmp * tmp;
237 }
238 
get_exponent_dynamic(NellyMoserEncodeContext * s,float * cand,int * idx_table)239 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
240 {
241     int i, j, band, best_idx;
242     float power_candidate, best_val;
243 
244     float  (*opt )[OPT_SIZE] = s->opt ;
245     uint8_t(*path)[OPT_SIZE] = s->path;
246 
247     for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
248         opt[0][i] = INFINITY;
249     }
250 
251     for (i = 0; i < 64; i++) {
252         opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
253         path[0][ff_nelly_init_table[i]] = i;
254     }
255 
256     for (band = 1; band < NELLY_BANDS; band++) {
257         int q, c = 0;
258         float tmp;
259         int idx_min, idx_max, idx;
260         power_candidate = cand[band];
261         for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
262             idx_min = FFMAX(0, cand[band] - q);
263             idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
264             for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
265                 if ( isinf(opt[band - 1][i]) )
266                     continue;
267                 for (j = 0; j < 32; j++) {
268                     idx = i + ff_nelly_delta_table[j];
269                     if (idx > idx_max)
270                         break;
271                     if (idx >= idx_min) {
272                         tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
273                         if (opt[band][idx] > tmp) {
274                             opt[band][idx] = tmp;
275                             path[band][idx] = j;
276                             c = 1;
277                         }
278                     }
279                 }
280             }
281         }
282         av_assert1(c); //FIXME
283     }
284 
285     best_val = INFINITY;
286     best_idx = -1;
287     band = NELLY_BANDS - 1;
288     for (i = 0; i < OPT_SIZE; i++) {
289         if (best_val > opt[band][i]) {
290             best_val = opt[band][i];
291             best_idx = i;
292         }
293     }
294     for (band = NELLY_BANDS - 1; band >= 0; band--) {
295         idx_table[band] = path[band][best_idx];
296         if (band) {
297             best_idx -= ff_nelly_delta_table[path[band][best_idx]];
298         }
299     }
300 }
301 
302 /**
303  * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
304  *  @param s               encoder context
305  *  @param output          output buffer
306  *  @param output_size     size of output buffer
307  */
encode_block(NellyMoserEncodeContext * s,unsigned char * output,int output_size)308 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
309 {
310     PutBitContext pb;
311     int i, j, band, block, best_idx, power_idx = 0;
312     float power_val, coeff, coeff_sum;
313     float pows[NELLY_FILL_LEN];
314     int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
315     float cand[NELLY_BANDS];
316 
317     apply_mdct(s);
318 
319     init_put_bits(&pb, output, output_size);
320 
321     i = 0;
322     for (band = 0; band < NELLY_BANDS; band++) {
323         coeff_sum = 0;
324         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
325             coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
326                        + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
327         }
328         cand[band] =
329             log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
330     }
331 
332     if (s->avctx->trellis) {
333         get_exponent_dynamic(s, cand, idx_table);
334     } else {
335         get_exponent_greedy(s, cand, idx_table);
336     }
337 
338     i = 0;
339     for (band = 0; band < NELLY_BANDS; band++) {
340         if (band) {
341             power_idx += ff_nelly_delta_table[idx_table[band]];
342             put_bits(&pb, 5, idx_table[band]);
343         } else {
344             power_idx = ff_nelly_init_table[idx_table[0]];
345             put_bits(&pb, 6, idx_table[0]);
346         }
347         power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
348         for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
349             s->mdct_out[i] *= power_val;
350             s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
351             pows[i] = power_idx;
352         }
353     }
354 
355     ff_nelly_get_sample_bits(pows, bits);
356 
357     for (block = 0; block < 2; block++) {
358         for (i = 0; i < NELLY_FILL_LEN; i++) {
359             if (bits[i] > 0) {
360                 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
361                 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
362                 best_idx =
363                     quant_lut[av_clip (
364                             coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
365                             quant_lut_offset[bits[i]],
366                             quant_lut_offset[bits[i]+1] - 1
367                             )];
368                 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
369                     best_idx++;
370 
371                 put_bits(&pb, bits[i], best_idx);
372             }
373         }
374         if (!block)
375             put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
376     }
377 
378     flush_put_bits(&pb);
379     memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
380 }
381 
encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)382 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
383                         const AVFrame *frame, int *got_packet_ptr)
384 {
385     NellyMoserEncodeContext *s = avctx->priv_data;
386     int ret;
387 
388     if (s->last_frame)
389         return 0;
390 
391     memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
392     if (frame) {
393         memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
394                frame->nb_samples * sizeof(*s->buf));
395         if (frame->nb_samples < NELLY_SAMPLES) {
396             memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
397                    (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
398             if (frame->nb_samples >= NELLY_BUF_LEN)
399                 s->last_frame = 1;
400         }
401         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
402             return ret;
403     } else {
404         memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
405         s->last_frame = 1;
406     }
407 
408     if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
409         return ret;
410     encode_block(s, avpkt->data, avpkt->size);
411 
412     /* Get the next frame pts/duration */
413     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
414                        &avpkt->duration);
415 
416     *got_packet_ptr = 1;
417     return 0;
418 }
419 
420 AVCodec ff_nellymoser_encoder = {
421     .name           = "nellymoser",
422     .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
423     .type           = AVMEDIA_TYPE_AUDIO,
424     .id             = AV_CODEC_ID_NELLYMOSER,
425     .priv_data_size = sizeof(NellyMoserEncodeContext),
426     .init           = encode_init,
427     .encode2        = encode_frame,
428     .close          = encode_end,
429     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
430     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
431                                                      AV_SAMPLE_FMT_NONE },
432     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
433 };
434