1 /*
2 * Nellymoser encoder
3 * This code is developed as part of Google Summer of Code 2008 Program.
4 *
5 * Copyright (c) 2008 Bartlomiej Wolowiec
6 *
7 * This file is part of FFmpeg.
8 *
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file
26 * Nellymoser encoder
27 * by Bartlomiej Wolowiec
28 *
29 * Generic codec information: libavcodec/nellymoserdec.c
30 *
31 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32 * (Copyright Joseph Artsimovich and UAB "DKD")
33 *
34 * for more information about nellymoser format, visit:
35 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36 */
37
38 #include "libavutil/common.h"
39 #include "libavutil/float_dsp.h"
40 #include "libavutil/mathematics.h"
41 #include "libavutil/thread.h"
42
43 #include "audio_frame_queue.h"
44 #include "avcodec.h"
45 #include "fft.h"
46 #include "internal.h"
47 #include "nellymoser.h"
48 #include "sinewin.h"
49
50 #define BITSTREAM_WRITER_LE
51 #include "put_bits.h"
52
53 #define POW_TABLE_SIZE (1<<11)
54 #define POW_TABLE_OFFSET 3
55 #define OPT_SIZE ((1<<15) + 3000)
56
57 typedef struct NellyMoserEncodeContext {
58 AVCodecContext *avctx;
59 int last_frame;
60 AVFloatDSPContext *fdsp;
61 FFTContext mdct_ctx;
62 AudioFrameQueue afq;
63 DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
64 DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
65 DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN]; ///< sample buffer
66 float (*opt )[OPT_SIZE];
67 uint8_t (*path)[OPT_SIZE];
68 } NellyMoserEncodeContext;
69
70 static float pow_table[POW_TABLE_SIZE]; ///< pow(2, -i / 2048.0 - 3.0);
71
72 static const uint8_t sf_lut[96] = {
73 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
74 5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14,
75 15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
76 27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
77 41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
78 54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
79 };
80
81 static const uint8_t sf_delta_lut[78] = {
82 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4,
83 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12,
84 13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
85 23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
86 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
87 };
88
89 static const uint8_t quant_lut[230] = {
90 0,
91
92 0, 1, 2,
93
94 0, 1, 2, 3, 4, 5, 6,
95
96 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11,
97 12, 13, 13, 13, 14,
98
99 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8,
100 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
101 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
102 30,
103
104 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
105 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,
106 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
107 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
108 21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
109 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
110 46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
111 53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
112 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
113 61, 61, 61, 61, 62,
114 };
115
116 static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 };
117 static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 };
118 static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
119
apply_mdct(NellyMoserEncodeContext * s)120 static void apply_mdct(NellyMoserEncodeContext *s)
121 {
122 float *in0 = s->buf;
123 float *in1 = s->buf + NELLY_BUF_LEN;
124 float *in2 = s->buf + 2 * NELLY_BUF_LEN;
125
126 s->fdsp->vector_fmul (s->in_buff, in0, ff_sine_128, NELLY_BUF_LEN);
127 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
128 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
129
130 s->fdsp->vector_fmul (s->in_buff, in1, ff_sine_128, NELLY_BUF_LEN);
131 s->fdsp->vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
132 s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
133 }
134
encode_end(AVCodecContext * avctx)135 static av_cold int encode_end(AVCodecContext *avctx)
136 {
137 NellyMoserEncodeContext *s = avctx->priv_data;
138
139 ff_mdct_end(&s->mdct_ctx);
140
141 av_freep(&s->opt);
142 av_freep(&s->path);
143 ff_af_queue_close(&s->afq);
144 av_freep(&s->fdsp);
145
146 return 0;
147 }
148
nellymoser_init_static(void)149 static av_cold void nellymoser_init_static(void)
150 {
151 /* faster way of doing
152 for (int i = 0; i < POW_TABLE_SIZE; i++)
153 pow_table[i] = 2^(-i / 2048.0 - 3.0 + POW_TABLE_OFFSET); */
154 pow_table[0] = 1;
155 pow_table[1024] = M_SQRT1_2;
156 for (int i = 1; i < 513; i++) {
157 double tmp = exp2(-i / 2048.0);
158 pow_table[i] = tmp;
159 pow_table[1024-i] = M_SQRT1_2 / tmp;
160 pow_table[1024+i] = tmp * M_SQRT1_2;
161 pow_table[2048-i] = 0.5 / tmp;
162 }
163 /* Generate overlap window */
164 ff_init_ff_sine_windows(7);
165 }
166
encode_init(AVCodecContext * avctx)167 static av_cold int encode_init(AVCodecContext *avctx)
168 {
169 static AVOnce init_static_once = AV_ONCE_INIT;
170 NellyMoserEncodeContext *s = avctx->priv_data;
171 int ret;
172
173 if (avctx->channels != 1) {
174 av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
175 return AVERROR(EINVAL);
176 }
177
178 if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
179 avctx->sample_rate != 11025 &&
180 avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
181 avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
182 av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
183 return AVERROR(EINVAL);
184 }
185
186 avctx->frame_size = NELLY_SAMPLES;
187 avctx->initial_padding = NELLY_BUF_LEN;
188 ff_af_queue_init(avctx, &s->afq);
189 s->avctx = avctx;
190 if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
191 return ret;
192 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
193 if (!s->fdsp)
194 return AVERROR(ENOMEM);
195
196 if (s->avctx->trellis) {
197 s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float ));
198 s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
199 if (!s->opt || !s->path)
200 return AVERROR(ENOMEM);
201 }
202
203 ff_thread_once(&init_static_once, nellymoser_init_static);
204
205 return 0;
206 }
207
208 #define find_best(val, table, LUT, LUT_add, LUT_size) \
209 best_idx = \
210 LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
211 if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
212 best_idx++;
213
get_exponent_greedy(NellyMoserEncodeContext * s,float * cand,int * idx_table)214 static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
215 {
216 int band, best_idx, power_idx = 0;
217 float power_candidate;
218
219 //base exponent
220 find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
221 idx_table[0] = best_idx;
222 power_idx = ff_nelly_init_table[best_idx];
223
224 for (band = 1; band < NELLY_BANDS; band++) {
225 power_candidate = cand[band] - power_idx;
226 find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
227 idx_table[band] = best_idx;
228 power_idx += ff_nelly_delta_table[best_idx];
229 }
230 }
231
distance(float x,float y,int band)232 static inline float distance(float x, float y, int band)
233 {
234 //return pow(fabs(x-y), 2.0);
235 float tmp = x - y;
236 return tmp * tmp;
237 }
238
get_exponent_dynamic(NellyMoserEncodeContext * s,float * cand,int * idx_table)239 static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
240 {
241 int i, j, band, best_idx;
242 float power_candidate, best_val;
243
244 float (*opt )[OPT_SIZE] = s->opt ;
245 uint8_t(*path)[OPT_SIZE] = s->path;
246
247 for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
248 opt[0][i] = INFINITY;
249 }
250
251 for (i = 0; i < 64; i++) {
252 opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
253 path[0][ff_nelly_init_table[i]] = i;
254 }
255
256 for (band = 1; band < NELLY_BANDS; band++) {
257 int q, c = 0;
258 float tmp;
259 int idx_min, idx_max, idx;
260 power_candidate = cand[band];
261 for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
262 idx_min = FFMAX(0, cand[band] - q);
263 idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
264 for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
265 if ( isinf(opt[band - 1][i]) )
266 continue;
267 for (j = 0; j < 32; j++) {
268 idx = i + ff_nelly_delta_table[j];
269 if (idx > idx_max)
270 break;
271 if (idx >= idx_min) {
272 tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
273 if (opt[band][idx] > tmp) {
274 opt[band][idx] = tmp;
275 path[band][idx] = j;
276 c = 1;
277 }
278 }
279 }
280 }
281 }
282 av_assert1(c); //FIXME
283 }
284
285 best_val = INFINITY;
286 best_idx = -1;
287 band = NELLY_BANDS - 1;
288 for (i = 0; i < OPT_SIZE; i++) {
289 if (best_val > opt[band][i]) {
290 best_val = opt[band][i];
291 best_idx = i;
292 }
293 }
294 for (band = NELLY_BANDS - 1; band >= 0; band--) {
295 idx_table[band] = path[band][best_idx];
296 if (band) {
297 best_idx -= ff_nelly_delta_table[path[band][best_idx]];
298 }
299 }
300 }
301
302 /**
303 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
304 * @param s encoder context
305 * @param output output buffer
306 * @param output_size size of output buffer
307 */
encode_block(NellyMoserEncodeContext * s,unsigned char * output,int output_size)308 static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
309 {
310 PutBitContext pb;
311 int i, j, band, block, best_idx, power_idx = 0;
312 float power_val, coeff, coeff_sum;
313 float pows[NELLY_FILL_LEN];
314 int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
315 float cand[NELLY_BANDS];
316
317 apply_mdct(s);
318
319 init_put_bits(&pb, output, output_size);
320
321 i = 0;
322 for (band = 0; band < NELLY_BANDS; band++) {
323 coeff_sum = 0;
324 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
325 coeff_sum += s->mdct_out[i ] * s->mdct_out[i ]
326 + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
327 }
328 cand[band] =
329 log2(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0;
330 }
331
332 if (s->avctx->trellis) {
333 get_exponent_dynamic(s, cand, idx_table);
334 } else {
335 get_exponent_greedy(s, cand, idx_table);
336 }
337
338 i = 0;
339 for (band = 0; band < NELLY_BANDS; band++) {
340 if (band) {
341 power_idx += ff_nelly_delta_table[idx_table[band]];
342 put_bits(&pb, 5, idx_table[band]);
343 } else {
344 power_idx = ff_nelly_init_table[idx_table[0]];
345 put_bits(&pb, 6, idx_table[0]);
346 }
347 power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
348 for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
349 s->mdct_out[i] *= power_val;
350 s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
351 pows[i] = power_idx;
352 }
353 }
354
355 ff_nelly_get_sample_bits(pows, bits);
356
357 for (block = 0; block < 2; block++) {
358 for (i = 0; i < NELLY_FILL_LEN; i++) {
359 if (bits[i] > 0) {
360 const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
361 coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
362 best_idx =
363 quant_lut[av_clip (
364 coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
365 quant_lut_offset[bits[i]],
366 quant_lut_offset[bits[i]+1] - 1
367 )];
368 if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
369 best_idx++;
370
371 put_bits(&pb, bits[i], best_idx);
372 }
373 }
374 if (!block)
375 put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
376 }
377
378 flush_put_bits(&pb);
379 memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
380 }
381
encode_frame(AVCodecContext * avctx,AVPacket * avpkt,const AVFrame * frame,int * got_packet_ptr)382 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
383 const AVFrame *frame, int *got_packet_ptr)
384 {
385 NellyMoserEncodeContext *s = avctx->priv_data;
386 int ret;
387
388 if (s->last_frame)
389 return 0;
390
391 memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
392 if (frame) {
393 memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
394 frame->nb_samples * sizeof(*s->buf));
395 if (frame->nb_samples < NELLY_SAMPLES) {
396 memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
397 (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
398 if (frame->nb_samples >= NELLY_BUF_LEN)
399 s->last_frame = 1;
400 }
401 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
402 return ret;
403 } else {
404 memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
405 s->last_frame = 1;
406 }
407
408 if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN, 0)) < 0)
409 return ret;
410 encode_block(s, avpkt->data, avpkt->size);
411
412 /* Get the next frame pts/duration */
413 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
414 &avpkt->duration);
415
416 *got_packet_ptr = 1;
417 return 0;
418 }
419
420 AVCodec ff_nellymoser_encoder = {
421 .name = "nellymoser",
422 .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
423 .type = AVMEDIA_TYPE_AUDIO,
424 .id = AV_CODEC_ID_NELLYMOSER,
425 .priv_data_size = sizeof(NellyMoserEncodeContext),
426 .init = encode_init,
427 .encode2 = encode_frame,
428 .close = encode_end,
429 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
430 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
431 AV_SAMPLE_FMT_NONE },
432 .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
433 };
434