• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *      LAME MP3 encoding engine
3  *
4  *      Copyright (c) 1999 Mark Taylor
5  *      Copyright (c) 2000-2002 Takehiro Tominaga
6  *      Copyright (c) 2000-2011 Robert Hegemann
7  *      Copyright (c) 2001 Gabriel Bouvigne
8  *      Copyright (c) 2001 John Dahlstrom
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Library General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Library General Public License for more details.
19  *
20  * You should have received a copy of the GNU Library General Public
21  * License along with this library; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
26 /* $Id$ */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 
32 
33 #include "lame.h"
34 #include "machine.h"
35 #include "encoder.h"
36 #include "util.h"
37 #include "lame_global_flags.h"
38 #include "newmdct.h"
39 #include "psymodel.h"
40 #include "lame-analysis.h"
41 #include "bitstream.h"
42 #include "VbrTag.h"
43 #include "quantize.h"
44 #include "quantize_pvt.h"
45 
46 
47 
48 /*
49  * auto-adjust of ATH, useful for low volume
50  * Gabriel Bouvigne 3 feb 2001
51  *
52  * modifies some values in
53  *   gfp->internal_flags->ATH
54  *   (gfc->ATH)
55  */
56 static void
adjust_ATH(lame_internal_flags const * const gfc)57 adjust_ATH(lame_internal_flags const *const gfc)
58 {
59     SessionConfig_t const *const cfg = &gfc->cfg;
60     FLOAT   gr2_max, max_pow;
61 
62     if (gfc->ATH->use_adjust == 0) {
63         gfc->ATH->adjust_factor = 1.0; /* no adjustment */
64         return;
65     }
66 
67     /* jd - 2001 mar 12, 27, jun 30 */
68     /* loudness based on equal loudness curve; */
69     /* use granule with maximum combined loudness */
70     max_pow = gfc->ov_psy.loudness_sq[0][0];
71     gr2_max = gfc->ov_psy.loudness_sq[1][0];
72     if (cfg->channels_out == 2) {
73         max_pow += gfc->ov_psy.loudness_sq[0][1];
74         gr2_max += gfc->ov_psy.loudness_sq[1][1];
75     }
76     else {
77         max_pow += max_pow;
78         gr2_max += gr2_max;
79     }
80     if (cfg->mode_gr == 2) {
81         max_pow = Max(max_pow, gr2_max);
82     }
83     max_pow *= 0.5;     /* max_pow approaches 1.0 for full band noise */
84 
85     /* jd - 2001 mar 31, jun 30 */
86     /* user tuning of ATH adjustment region */
87     max_pow *= gfc->ATH->aa_sensitivity_p;
88 
89     /*  adjust ATH depending on range of maximum value
90      */
91 
92     /* jd - 2001 feb27, mar12,20, jun30, jul22 */
93     /* continuous curves based on approximation */
94     /* to GB's original values. */
95     /* For an increase in approximate loudness, */
96     /* set ATH adjust to adjust_limit immediately */
97     /* after a delay of one frame. */
98     /* For a loudness decrease, reduce ATH adjust */
99     /* towards adjust_limit gradually. */
100     /* max_pow is a loudness squared or a power. */
101     if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
102         if (gfc->ATH->adjust_factor >= 1.0) {
103             gfc->ATH->adjust_factor = 1.0;
104         }
105         else {
106             /* preceding frame has lower ATH adjust; */
107             /* ascend only to the preceding adjust_limit */
108             /* in case there is leading low volume */
109             if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
110                 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
111             }
112         }
113         gfc->ATH->adjust_limit = 1.0;
114     }
115     else {              /* adjustment curve */
116         /* about 32 dB maximum adjust (0.000625) */
117         FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
118         if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
119             gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
120             if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
121                 gfc->ATH->adjust_factor = adj_lim_new;
122             }
123         }
124         else {          /* ascend */
125             if (gfc->ATH->adjust_limit >= adj_lim_new) {
126                 gfc->ATH->adjust_factor = adj_lim_new;
127             }
128             else {      /* preceding frame has lower ATH adjust; */
129                 /* ascend only to the preceding adjust_limit */
130                 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
131                     gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
132                 }
133             }
134         }
135         gfc->ATH->adjust_limit = adj_lim_new;
136     }
137 }
138 
139 /***********************************************************************
140  *
141  *  some simple statistics
142  *
143  *  bitrate index 0: free bitrate -> not allowed in VBR mode
144  *  : bitrates, kbps depending on MPEG version
145  *  bitrate index 15: forbidden
146  *
147  *  mode_ext:
148  *  0:  LR
149  *  1:  LR-i
150  *  2:  MS
151  *  3:  MS-i
152  *
153  ***********************************************************************/
154 
155 static void
updateStats(lame_internal_flags * const gfc)156 updateStats(lame_internal_flags * const gfc)
157 {
158     SessionConfig_t const *const cfg = &gfc->cfg;
159     EncResult_t *eov = &gfc->ov_enc;
160     int     gr, ch;
161     assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
162     assert(0 <= eov->mode_ext && eov->mode_ext < 4);
163 
164     /* count bitrate indices */
165     eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
166     eov->bitrate_channelmode_hist[15][4]++;
167 
168     /* count 'em for every mode extension in case of 2 channel encoding */
169     if (cfg->channels_out == 2) {
170         eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
171         eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
172     }
173     for (gr = 0; gr < cfg->mode_gr; ++gr) {
174         for (ch = 0; ch < cfg->channels_out; ++ch) {
175             int     bt = gfc->l3_side.tt[gr][ch].block_type;
176             if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
177                 bt = 4;
178             eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
179             eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
180             eov->bitrate_blocktype_hist[15][bt]++;
181             eov->bitrate_blocktype_hist[15][5]++;
182         }
183     }
184 }
185 
186 
187 
188 
189 static void
lame_encode_frame_init(lame_internal_flags * gfc,const sample_t * const inbuf[2])190 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
191 {
192     SessionConfig_t const *const cfg = &gfc->cfg;
193 
194     int     ch, gr;
195 
196     if (gfc->lame_encode_frame_init == 0) {
197         sample_t primebuff0[286 + 1152 + 576];
198         sample_t primebuff1[286 + 1152 + 576];
199         int const framesize = 576 * cfg->mode_gr;
200         /* prime the MDCT/polyphase filterbank with a short block */
201         int     i, j;
202         gfc->lame_encode_frame_init = 1;
203         memset(primebuff0, 0, sizeof(primebuff0));
204         memset(primebuff1, 0, sizeof(primebuff1));
205         for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
206             if (i < framesize) {
207                 primebuff0[i] = 0;
208                 if (cfg->channels_out == 2)
209                     primebuff1[i] = 0;
210             }
211             else {
212                 primebuff0[i] = inbuf[0][j];
213                 if (cfg->channels_out == 2)
214                     primebuff1[i] = inbuf[1][j];
215                 ++j;
216             }
217         }
218         /* polyphase filtering / mdct */
219         for (gr = 0; gr < cfg->mode_gr; gr++) {
220             for (ch = 0; ch < cfg->channels_out; ch++) {
221                 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
222             }
223         }
224         mdct_sub48(gfc, primebuff0, primebuff1);
225 
226         /* check FFT will not use a negative starting offset */
227 #if 576 < FFTOFFSET
228 # error FFTOFFSET greater than 576: FFT uses a negative offset
229 #endif
230         /* check if we have enough data for FFT */
231         assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
232         /* check if we have enough data for polyphase filterbank */
233         assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
234     }
235 
236 }
237 
238 
239 
240 
241 
242 
243 
244 /************************************************************************
245 *
246 * encodeframe()           Layer 3
247 *
248 * encode a single frame
249 *
250 ************************************************************************
251 lame_encode_frame()
252 
253 
254                        gr 0            gr 1
255 inbuf:           |--------------|--------------|--------------|
256 
257 
258 Polyphase (18 windows, each shifted 32)
259 gr 0:
260 window1          <----512---->
261 window18                 <----512---->
262 
263 gr 1:
264 window1                         <----512---->
265 window18                                <----512---->
266 
267 
268 
269 MDCT output:  |--------------|--------------|--------------|
270 
271 FFT's                    <---------1024---------->
272                                          <---------1024-------->
273 
274 
275 
276     inbuf = buffer of PCM data size=MP3 framesize
277     encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
278     so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
279 
280     psy-model FFT has a 1 granule delay, so we feed it data for the
281     next granule.
282     FFT is centered over granule:  224+576+224
283     So FFT starts at:   576-224-MDCTDELAY
284 
285     MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
286     MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)
287 
288     MPEG2:  polyphase first window:  [0..511]
289                       18th window:   [544..1055]          (1056)
290     MPEG1:            36th window:   [1120..1631]         (1632)
291             data needed:  512+framesize-32
292 
293     A close look newmdct.c shows that the polyphase filterbank
294     only uses data from [0..510] for each window.  Perhaps because the window
295     used by the filterbank is zero for the last point, so Takehiro's
296     code doesn't bother to compute with it.
297 
298     FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET
299 
300 */
301 
302 typedef FLOAT chgrdata[2][2];
303 
304 
305 int
lame_encode_mp3_frame(lame_internal_flags * gfc,sample_t const * inbuf_l,sample_t const * inbuf_r,unsigned char * mp3buf,int mp3buf_size)306 lame_encode_mp3_frame(       /* Output */
307                          lame_internal_flags * gfc, /* Context */
308                          sample_t const *inbuf_l, /* Input */
309                          sample_t const *inbuf_r, /* Input */
310                          unsigned char *mp3buf, /* Output */
311                          int mp3buf_size)
312 {                       /* Output */
313     SessionConfig_t const *const cfg = &gfc->cfg;
314     int     mp3count;
315     III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
316     III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
317     const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
318     const sample_t *inbuf[2];
319 
320     FLOAT   tot_ener[2][4];
321     FLOAT   ms_ener_ratio[2] = { .5, .5 };
322     FLOAT   pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
323     0., 0.}, {
324     0., 0.}};
325     FLOAT (*pe_use)[2];
326 
327     int     ch, gr;
328 
329     inbuf[0] = inbuf_l;
330     inbuf[1] = inbuf_r;
331 
332     if (gfc->lame_encode_frame_init == 0) {
333         /*first run? */
334         lame_encode_frame_init(gfc, inbuf);
335 
336     }
337 
338 
339     /********************** padding *****************************/
340     /* padding method as described in
341      * "MPEG-Layer3 / Bitstream Syntax and Decoding"
342      * by Martin Sieler, Ralph Sperschneider
343      *
344      * note: there is no padding for the very first frame
345      *
346      * Robert Hegemann 2000-06-22
347      */
348     gfc->ov_enc.padding = FALSE;
349     if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
350         gfc->sv_enc.slot_lag += cfg->samplerate_out;
351         gfc->ov_enc.padding = TRUE;
352     }
353 
354 
355 
356     /****************************************
357     *   Stage 1: psychoacoustic model       *
358     ****************************************/
359 
360     {
361         /* psychoacoustic model
362          * psy model has a 1 granule (576) delay that we must compensate for
363          * (mt 6/99).
364          */
365         int     ret;
366         const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
367         int     blocktype[2];
368 
369         for (gr = 0; gr < cfg->mode_gr; gr++) {
370 
371             for (ch = 0; ch < cfg->channels_out; ch++) {
372                 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
373             }
374             ret = L3psycho_anal_vbr(gfc, bufp, gr,
375                                     masking_LR, masking_MS,
376                                     pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
377             if (ret != 0)
378                 return -4;
379 
380             if (cfg->mode == JOINT_STEREO) {
381                 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
382                 if (ms_ener_ratio[gr] > 0)
383                     ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
384             }
385 
386             /* block type flags */
387             for (ch = 0; ch < cfg->channels_out; ch++) {
388                 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
389                 cod_info->block_type = blocktype[ch];
390                 cod_info->mixed_block_flag = 0;
391             }
392         }
393     }
394 
395 
396     /* auto-adjust of ATH, useful for low volume */
397     adjust_ATH(gfc);
398 
399 
400     /****************************************
401     *   Stage 2: MDCT                       *
402     ****************************************/
403 
404     /* polyphase filtering / mdct */
405     mdct_sub48(gfc, inbuf[0], inbuf[1]);
406 
407 
408     /****************************************
409     *   Stage 3: MS/LR decision             *
410     ****************************************/
411 
412     /* Here will be selected MS or LR coding of the 2 stereo channels */
413     gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
414 
415     if (cfg->force_ms) {
416         gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
417     }
418     else if (cfg->mode == JOINT_STEREO) {
419         /* ms_ratio = is scaled, for historical reasons, to look like
420            a ratio of side_channel / total.
421            0 = signal is 100% mono
422            .5 = L & R uncorrelated
423          */
424 
425         /* [0] and [1] are the results for the two granules in MPEG-1,
426          * in MPEG-2 it's only a faked averaging of the same value
427          * _prev is the value of the last granule of the previous frame
428          * _next is the value of the first granule of the next frame
429          */
430 
431         FLOAT   sum_pe_MS = 0;
432         FLOAT   sum_pe_LR = 0;
433         for (gr = 0; gr < cfg->mode_gr; gr++) {
434             for (ch = 0; ch < cfg->channels_out; ch++) {
435                 sum_pe_MS += pe_MS[gr][ch];
436                 sum_pe_LR += pe[gr][ch];
437             }
438         }
439 
440         /* based on PE: M/S coding would not use much more bits than L/R */
441         if (sum_pe_MS <= 1.00 * sum_pe_LR) {
442 
443             gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
444             gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
445 
446             if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
447 
448                 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
449             }
450         }
451     }
452 
453     /* bit and noise allocation */
454     if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
455         masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
456         pe_use = pe_MS;
457     }
458     else {
459         masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
460         pe_use = pe;
461     }
462 
463 
464     /* copy data for MP3 frame analyzer */
465     if (cfg->analysis && gfc->pinfo != NULL) {
466         for (gr = 0; gr < cfg->mode_gr; gr++) {
467             for (ch = 0; ch < cfg->channels_out; ch++) {
468                 gfc->pinfo->ms_ratio[gr] = 0;
469                 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
470                 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
471                 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
472                 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
473                 /* in psymodel, LR and MS data was stored in pinfo.
474                    switch to MS data: */
475                 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
476                     gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
477                     memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
478                            sizeof(gfc->pinfo->energy[gr][ch]));
479                 }
480             }
481         }
482     }
483 
484 
485     /****************************************
486     *   Stage 4: quantization loop          *
487     ****************************************/
488 
489     if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
490         static FLOAT const fircoef[9] = {
491             -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
492             7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
493             0.187098 * 5
494         };
495 
496         int     i;
497         FLOAT   f;
498 
499         for (i = 0; i < 18; i++)
500             gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
501 
502         f = 0.0;
503         for (gr = 0; gr < cfg->mode_gr; gr++)
504             for (ch = 0; ch < cfg->channels_out; ch++)
505                 f += pe_use[gr][ch];
506         gfc->sv_enc.pefirbuf[18] = f;
507 
508         f = gfc->sv_enc.pefirbuf[9];
509         for (i = 0; i < 9; i++)
510             f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
511 
512         f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
513         for (gr = 0; gr < cfg->mode_gr; gr++) {
514             for (ch = 0; ch < cfg->channels_out; ch++) {
515                 pe_use[gr][ch] *= f;
516             }
517         }
518     }
519     switch (cfg->vbr)
520     {
521     default:
522     case vbr_off:
523         CBR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
524         break;
525     case vbr_abr:
526         ABR_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
527         break;
528     case vbr_rh:
529         VBR_old_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
530         break;
531     case vbr_mt:
532     case vbr_mtrh:
533         VBR_new_iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
534         break;
535     }
536 
537 
538     /****************************************
539     *   Stage 5: bitstream formatting       *
540     ****************************************/
541 
542 
543     /*  write the frame to the bitstream  */
544     (void) format_bitstream(gfc);
545 
546     /* copy mp3 bit buffer into array */
547     mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
548 
549 
550     if (cfg->write_lame_tag) {
551         AddVbrFrame(gfc);
552     }
553 
554     if (cfg->analysis && gfc->pinfo != NULL) {
555         int     framesize = 576 * cfg->mode_gr;
556         for (ch = 0; ch < cfg->channels_out; ch++) {
557             int     j;
558             for (j = 0; j < FFTOFFSET; j++)
559                 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
560             for (j = FFTOFFSET; j < 1600; j++) {
561                 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
562             }
563         }
564         gfc->sv_qnt.masking_lower = 1.0;
565 
566         set_frame_pinfo(gfc, masking);
567     }
568 
569     ++gfc->ov_enc.frame_number;
570 
571     updateStats(gfc);
572 
573     return mp3count;
574 }
575