1 /*
2 * LAME MP3 encoding engine
3 *
4 * Copyright (c) 1999 Mark Taylor
5 * Copyright (c) 2000-2002 Takehiro Tominaga
6 * Copyright (c) 2000-2011 Robert Hegemann
7 * Copyright (c) 2001 Gabriel Bouvigne
8 * Copyright (c) 2001 John Dahlstrom
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public
21 * License along with this library; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
24 */
25
26 /* $Id$ */
27
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31
32
33 #include "lame.h"
34 #include "machine.h"
35 #include "encoder.h"
36 #include "util.h"
37 #include "lame_global_flags.h"
38 #include "newmdct.h"
39 #include "psymodel.h"
40 #include "lame-analysis.h"
41 #include "bitstream.h"
42 #include "VbrTag.h"
43 #include "quantize.h"
44 #include "quantize_pvt.h"
45
46
47
48 /*
49 * auto-adjust of ATH, useful for low volume
50 * Gabriel Bouvigne 3 feb 2001
51 *
52 * modifies some values in
53 * gfp->internal_flags->ATH
54 * (gfc->ATH)
55 */
56 static void
adjust_ATH(lame_internal_flags const * const gfc)57 adjust_ATH(lame_internal_flags const *const gfc)
58 {
59 SessionConfig_t const *const cfg = &gfc->cfg;
60 FLOAT gr2_max, max_pow;
61
62 if (gfc->ATH->use_adjust == 0) {
63 gfc->ATH->adjust_factor = 1.0; /* no adjustment */
64 return;
65 }
66
67 /* jd - 2001 mar 12, 27, jun 30 */
68 /* loudness based on equal loudness curve; */
69 /* use granule with maximum combined loudness */
70 max_pow = gfc->ov_psy.loudness_sq[0][0];
71 gr2_max = gfc->ov_psy.loudness_sq[1][0];
72 if (cfg->channels_out == 2) {
73 max_pow += gfc->ov_psy.loudness_sq[0][1];
74 gr2_max += gfc->ov_psy.loudness_sq[1][1];
75 }
76 else {
77 max_pow += max_pow;
78 gr2_max += gr2_max;
79 }
80 if (cfg->mode_gr == 2) {
81 max_pow = Max(max_pow, gr2_max);
82 }
83 max_pow *= 0.5; /* max_pow approaches 1.0 for full band noise */
84
85 /* jd - 2001 mar 31, jun 30 */
86 /* user tuning of ATH adjustment region */
87 max_pow *= gfc->ATH->aa_sensitivity_p;
88
89 /* adjust ATH depending on range of maximum value
90 */
91
92 /* jd - 2001 feb27, mar12,20, jun30, jul22 */
93 /* continuous curves based on approximation */
94 /* to GB's original values. */
95 /* For an increase in approximate loudness, */
96 /* set ATH adjust to adjust_limit immediately */
97 /* after a delay of one frame. */
98 /* For a loudness decrease, reduce ATH adjust */
99 /* towards adjust_limit gradually. */
100 /* max_pow is a loudness squared or a power. */
101 if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
102 if (gfc->ATH->adjust_factor >= 1.0) {
103 gfc->ATH->adjust_factor = 1.0;
104 }
105 else {
106 /* preceding frame has lower ATH adjust; */
107 /* ascend only to the preceding adjust_limit */
108 /* in case there is leading low volume */
109 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
110 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
111 }
112 }
113 gfc->ATH->adjust_limit = 1.0;
114 }
115 else { /* adjustment curve */
116 /* about 32 dB maximum adjust (0.000625) */
117 FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
118 if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
119 gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
120 if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
121 gfc->ATH->adjust_factor = adj_lim_new;
122 }
123 }
124 else { /* ascend */
125 if (gfc->ATH->adjust_limit >= adj_lim_new) {
126 gfc->ATH->adjust_factor = adj_lim_new;
127 }
128 else { /* preceding frame has lower ATH adjust; */
129 /* ascend only to the preceding adjust_limit */
130 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
131 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
132 }
133 }
134 }
135 gfc->ATH->adjust_limit = adj_lim_new;
136 }
137 }
138
139 /***********************************************************************
140 *
141 * some simple statistics
142 *
143 * bitrate index 0: free bitrate -> not allowed in VBR mode
144 * : bitrates, kbps depending on MPEG version
145 * bitrate index 15: forbidden
146 *
147 * mode_ext:
148 * 0: LR
149 * 1: LR-i
150 * 2: MS
151 * 3: MS-i
152 *
153 ***********************************************************************/
154
155 static void
updateStats(lame_internal_flags * const gfc)156 updateStats(lame_internal_flags * const gfc)
157 {
158 SessionConfig_t const *const cfg = &gfc->cfg;
159 EncResult_t *eov = &gfc->ov_enc;
160 int gr, ch;
161 assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
162 assert(0 <= eov->mode_ext && eov->mode_ext < 4);
163
164 /* count bitrate indices */
165 eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
166 eov->bitrate_channelmode_hist[15][4]++;
167
168 /* count 'em for every mode extension in case of 2 channel encoding */
169 if (cfg->channels_out == 2) {
170 eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
171 eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
172 }
173 for (gr = 0; gr < cfg->mode_gr; ++gr) {
174 for (ch = 0; ch < cfg->channels_out; ++ch) {
175 int bt = gfc->l3_side.tt[gr][ch].block_type;
176 if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
177 bt = 4;
178 eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
179 eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
180 eov->bitrate_blocktype_hist[15][bt]++;
181 eov->bitrate_blocktype_hist[15][5]++;
182 }
183 }
184 }
185
186
187
188
189 static void
lame_encode_frame_init(lame_internal_flags * gfc,const sample_t * const inbuf[2])190 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
191 {
192 SessionConfig_t const *const cfg = &gfc->cfg;
193
194 int ch, gr;
195
196 if (gfc->lame_encode_frame_init == 0) {
197 sample_t primebuff0[286 + 1152 + 576];
198 sample_t primebuff1[286 + 1152 + 576];
199 int const framesize = 576 * cfg->mode_gr;
200 /* prime the MDCT/polyphase filterbank with a short block */
201 int i, j;
202 gfc->lame_encode_frame_init = 1;
203 memset(primebuff0, 0, sizeof(primebuff0));
204 memset(primebuff1, 0, sizeof(primebuff1));
205 for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
206 if (i < framesize) {
207 primebuff0[i] = 0;
208 if (cfg->channels_out == 2)
209 primebuff1[i] = 0;
210 }
211 else {
212 primebuff0[i] = inbuf[0][j];
213 if (cfg->channels_out == 2)
214 primebuff1[i] = inbuf[1][j];
215 ++j;
216 }
217 }
218 /* polyphase filtering / mdct */
219 for (gr = 0; gr < cfg->mode_gr; gr++) {
220 for (ch = 0; ch < cfg->channels_out; ch++) {
221 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
222 }
223 }
224 mdct_sub48(gfc, primebuff0, primebuff1);
225
226 /* check FFT will not use a negative starting offset */
227 #if 576 < FFTOFFSET
228 # error FFTOFFSET greater than 576: FFT uses a negative offset
229 #endif
230 /* check if we have enough data for FFT */
231 assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
232 /* check if we have enough data for polyphase filterbank */
233 assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
234 }
235
236 }
237
238
239
240
241
242
243
244 /************************************************************************
245 *
246 * encodeframe() Layer 3
247 *
248 * encode a single frame
249 *
250 ************************************************************************
251 lame_encode_frame()
252
253
254 gr 0 gr 1
255 inbuf: |--------------|--------------|--------------|
256
257
258 Polyphase (18 windows, each shifted 32)
259 gr 0:
260 window1 <----512---->
261 window18 <----512---->
262
263 gr 1:
264 window1 <----512---->
265 window18 <----512---->
266
267
268
269 MDCT output: |--------------|--------------|--------------|
270
271 FFT's <---------1024---------->
272 <---------1024-------->
273
274
275
276 inbuf = buffer of PCM data size=MP3 framesize
277 encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
278 so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
279
280 psy-model FFT has a 1 granule delay, so we feed it data for the
281 next granule.
282 FFT is centered over granule: 224+576+224
283 So FFT starts at: 576-224-MDCTDELAY
284
285 MPEG2: FFT ends at: BLKSIZE+576-224-MDCTDELAY (1328)
286 MPEG1: FFT ends at: BLKSIZE+2*576-224-MDCTDELAY (1904)
287
288 MPEG2: polyphase first window: [0..511]
289 18th window: [544..1055] (1056)
290 MPEG1: 36th window: [1120..1631] (1632)
291 data needed: 512+framesize-32
292
293 A close look newmdct.c shows that the polyphase filterbank
294 only uses data from [0..510] for each window. Perhaps because the window
295 used by the filterbank is zero for the last point, so Takehiro's
296 code doesn't bother to compute with it.
297
298 FFT starts at 576-224-MDCTDELAY (304) = 576-FFTOFFSET
299
300 */
301
302 typedef FLOAT chgrdata[2][2];
303
304
305 int
lame_encode_mp3_frame(lame_internal_flags * gfc,sample_t const * inbuf_l,sample_t const * inbuf_r,unsigned char * mp3buf,int mp3buf_size)306 lame_encode_mp3_frame( /* Output */
307 lame_internal_flags * gfc, /* Context */
308 sample_t const *inbuf_l, /* Input */
309 sample_t const *inbuf_r, /* Input */
310 unsigned char *mp3buf, /* Output */
311 int mp3buf_size)
312 { /* Output */
313 SessionConfig_t const *const cfg = &gfc->cfg;
314 int mp3count;
315 III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
316 III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
317 const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
318 const sample_t *inbuf[2];
319
320 FLOAT tot_ener[2][4];
321 FLOAT ms_ener_ratio[2] = { .5, .5 };
322 FLOAT pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
323 0., 0.}, {
324 0., 0.}};
325 FLOAT (*pe_use)[2];
326
327 int ch, gr;
328
329 inbuf[0] = inbuf_l;
330 inbuf[1] = inbuf_r;
331
332 if (gfc->lame_encode_frame_init == 0) {
333 /*first run? */
334 lame_encode_frame_init(gfc, inbuf);
335
336 }
337
338
339 /********************** padding *****************************/
340 /* padding method as described in
341 * "MPEG-Layer3 / Bitstream Syntax and Decoding"
342 * by Martin Sieler, Ralph Sperschneider
343 *
344 * note: there is no padding for the very first frame
345 *
346 * Robert Hegemann 2000-06-22
347 */
348 gfc->ov_enc.padding = FALSE;
349 if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
350 gfc->sv_enc.slot_lag += cfg->samplerate_out;
351 gfc->ov_enc.padding = TRUE;
352 }
353
354
355
356 /****************************************
357 * Stage 1: psychoacoustic model *
358 ****************************************/
359
360 {
361 /* psychoacoustic model
362 * psy model has a 1 granule (576) delay that we must compensate for
363 * (mt 6/99).
364 */
365 int ret;
366 const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
367 int blocktype[2];
368
369 for (gr = 0; gr < cfg->mode_gr; gr++) {
370
371 for (ch = 0; ch < cfg->channels_out; ch++) {
372 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
373 }
374 ret = L3psycho_anal_vbr(gfc, bufp, gr,
375 masking_LR, masking_MS,
376 pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
377 if (ret != 0)
378 return -4;
379
380 if (cfg->mode == JOINT_STEREO) {
381 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
382 if (ms_ener_ratio[gr] > 0)
383 ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
384 }
385
386 /* block type flags */
387 for (ch = 0; ch < cfg->channels_out; ch++) {
388 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
389 cod_info->block_type = blocktype[ch];
390 cod_info->mixed_block_flag = 0;
391 }
392 }
393 }
394
395
396 /* auto-adjust of ATH, useful for low volume */
397 adjust_ATH(gfc);
398
399
400 /****************************************
401 * Stage 2: MDCT *
402 ****************************************/
403
404 /* polyphase filtering / mdct */
405 mdct_sub48(gfc, inbuf[0], inbuf[1]);
406
407
408 /****************************************
409 * Stage 3: MS/LR decision *
410 ****************************************/
411
412 /* Here will be selected MS or LR coding of the 2 stereo channels */
413 gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
414
415 if (cfg->force_ms) {
416 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
417 }
418 else if (cfg->mode == JOINT_STEREO) {
419 /* ms_ratio = is scaled, for historical reasons, to look like
420 a ratio of side_channel / total.
421 0 = signal is 100% mono
422 .5 = L & R uncorrelated
423 */
424
425 /* [0] and [1] are the results for the two granules in MPEG-1,
426 * in MPEG-2 it's only a faked averaging of the same value
427 * _prev is the value of the last granule of the previous frame
428 * _next is the value of the first granule of the next frame
429 */
430
431 FLOAT sum_pe_MS = 0;
432 FLOAT sum_pe_LR = 0;
433 for (gr = 0; gr < cfg->mode_gr; gr++) {
434 for (ch = 0; ch < cfg->channels_out; ch++) {
435 sum_pe_MS += pe_MS[gr][ch];
436 sum_pe_LR += pe[gr][ch];
437 }
438 }
439
440 /* based on PE: M/S coding would not use much more bits than L/R */
441 if (sum_pe_MS <= 1.00 * sum_pe_LR) {
442
443 gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
444 gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
445
446 if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
447
448 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
449 }
450 }
451 }
452
453 /* bit and noise allocation */
454 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
455 masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
456 pe_use = pe_MS;
457 }
458 else {
459 masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
460 pe_use = pe;
461 }
462
463
464 /* copy data for MP3 frame analyzer */
465 if (cfg->analysis && gfc->pinfo != NULL) {
466 for (gr = 0; gr < cfg->mode_gr; gr++) {
467 for (ch = 0; ch < cfg->channels_out; ch++) {
468 gfc->pinfo->ms_ratio[gr] = 0;
469 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
470 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
471 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
472 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
473 /* in psymodel, LR and MS data was stored in pinfo.
474 switch to MS data: */
475 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
476 gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
477 memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
478 sizeof(gfc->pinfo->energy[gr][ch]));
479 }
480 }
481 }
482 }
483
484
485 /****************************************
486 * Stage 4: quantization loop *
487 ****************************************/
488
489 if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
490 static FLOAT const fircoef[9] = {
491 -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
492 7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
493 0.187098 * 5
494 };
495
496 int i;
497 FLOAT f;
498
499 for (i = 0; i < 18; i++)
500 gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
501
502 f = 0.0;
503 for (gr = 0; gr < cfg->mode_gr; gr++)
504 for (ch = 0; ch < cfg->channels_out; ch++)
505 f += pe_use[gr][ch];
506 gfc->sv_enc.pefirbuf[18] = f;
507
508 f = gfc->sv_enc.pefirbuf[9];
509 for (i = 0; i < 9; i++)
510 f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
511
512 f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
513 for (gr = 0; gr < cfg->mode_gr; gr++) {
514 for (ch = 0; ch < cfg->channels_out; ch++) {
515 pe_use[gr][ch] *= f;
516 }
517 }
518 }
519 switch (cfg->vbr)
520 {
521 default:
522 case vbr_off:
523 CBR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
524 break;
525 case vbr_abr:
526 ABR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
527 break;
528 case vbr_rh:
529 VBR_old_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
530 break;
531 case vbr_mt:
532 case vbr_mtrh:
533 VBR_new_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
534 break;
535 }
536
537
538 /****************************************
539 * Stage 5: bitstream formatting *
540 ****************************************/
541
542
543 /* write the frame to the bitstream */
544 (void) format_bitstream(gfc);
545
546 /* copy mp3 bit buffer into array */
547 mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
548
549
550 if (cfg->write_lame_tag) {
551 AddVbrFrame(gfc);
552 }
553
554 if (cfg->analysis && gfc->pinfo != NULL) {
555 int framesize = 576 * cfg->mode_gr;
556 for (ch = 0; ch < cfg->channels_out; ch++) {
557 int j;
558 for (j = 0; j < FFTOFFSET; j++)
559 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
560 for (j = FFTOFFSET; j < 1600; j++) {
561 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
562 }
563 }
564 gfc->sv_qnt.masking_lower = 1.0;
565
566 set_frame_pinfo(gfc, masking);
567 }
568
569 ++gfc->ov_enc.frame_number;
570
571 updateStats(gfc);
572
573 return mp3count;
574 }
575