1 /*
2 * MP3 huffman table selecting and bit counting
3 *
4 * Copyright (c) 1999-2005 Takehiro TOMINAGA
5 * Copyright (c) 2002-2005 Gabriel Bouvigne
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23 /* $Id$ */
24
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28
29
30 #include "lame.h"
31 #include "machine.h"
32 #include "encoder.h"
33 #include "util.h"
34 #include "quantize_pvt.h"
35 #include "tables.h"
36
37
38 static const struct {
39 const int region0_count;
40 const int region1_count;
41 } subdv_table[23] = {
42 {
43 0, 0}, /* 0 bands */
44 {
45 0, 0}, /* 1 bands */
46 {
47 0, 0}, /* 2 bands */
48 {
49 0, 0}, /* 3 bands */
50 {
51 0, 0}, /* 4 bands */
52 {
53 0, 1}, /* 5 bands */
54 {
55 1, 1}, /* 6 bands */
56 {
57 1, 1}, /* 7 bands */
58 {
59 1, 2}, /* 8 bands */
60 {
61 2, 2}, /* 9 bands */
62 {
63 2, 3}, /* 10 bands */
64 {
65 2, 3}, /* 11 bands */
66 {
67 3, 4}, /* 12 bands */
68 {
69 3, 4}, /* 13 bands */
70 {
71 3, 4}, /* 14 bands */
72 {
73 4, 5}, /* 15 bands */
74 {
75 4, 5}, /* 16 bands */
76 {
77 4, 6}, /* 17 bands */
78 {
79 5, 6}, /* 18 bands */
80 {
81 5, 6}, /* 19 bands */
82 {
83 5, 7}, /* 20 bands */
84 {
85 6, 7}, /* 21 bands */
86 {
87 6, 7}, /* 22 bands */
88 };
89
90
91
92
93
94 /*********************************************************************
95 * nonlinear quantization of xr
96 * More accurate formula than the ISO formula. Takes into account
97 * the fact that we are quantizing xr -> ix, but we want ix^4/3 to be
98 * as close as possible to x^4/3. (taking the nearest int would mean
99 * ix is as close as possible to xr, which is different.)
100 *
101 * From Segher Boessenkool <segher@eastsite.nl> 11/1999
102 *
103 * 09/2000: ASM code removed in favor of IEEE754 hack by Takehiro
104 * Tominaga. If you need the ASM code, check CVS circa Aug 2000.
105 *
106 * 01/2004: Optimizations by Gabriel Bouvigne
107 *********************************************************************/
108
109
110
111
112
113 static void
quantize_lines_xrpow_01(unsigned int l,FLOAT istep,const FLOAT * xr,int * ix)114 quantize_lines_xrpow_01(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
115 {
116 const FLOAT compareval0 = (1.0f - 0.4054f) / istep;
117 unsigned int i;
118
119 assert(l > 0);
120 assert(l % 2 == 0);
121 for (i = 0; i < l; i += 2) {
122 FLOAT const xr_0 = xr[i+0];
123 FLOAT const xr_1 = xr[i+1];
124 int const ix_0 = (compareval0 > xr_0) ? 0 : 1;
125 int const ix_1 = (compareval0 > xr_1) ? 0 : 1;
126 ix[i+0] = ix_0;
127 ix[i+1] = ix_1;
128 }
129 }
130
131
132
133 #ifdef TAKEHIRO_IEEE754_HACK
134
135 typedef union {
136 float f;
137 int i;
138 } fi_union;
139
140 #define MAGIC_FLOAT (65536*(128))
141 #define MAGIC_INT 0x4b000000
142
143
144 static void
quantize_lines_xrpow(unsigned int l,FLOAT istep,const FLOAT * xp,int * pi)145 quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xp, int *pi)
146 {
147 fi_union *fi;
148 unsigned int remaining;
149
150 assert(l > 0);
151
152 fi = (fi_union *) pi;
153
154 l = l >> 1;
155 remaining = l % 2;
156 l = l >> 1;
157 while (l--) {
158 double x0 = istep * xp[0];
159 double x1 = istep * xp[1];
160 double x2 = istep * xp[2];
161 double x3 = istep * xp[3];
162
163 x0 += MAGIC_FLOAT;
164 fi[0].f = x0;
165 x1 += MAGIC_FLOAT;
166 fi[1].f = x1;
167 x2 += MAGIC_FLOAT;
168 fi[2].f = x2;
169 x3 += MAGIC_FLOAT;
170 fi[3].f = x3;
171
172 fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
173 fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];
174 fi[2].f = x2 + adj43asm[fi[2].i - MAGIC_INT];
175 fi[3].f = x3 + adj43asm[fi[3].i - MAGIC_INT];
176
177 fi[0].i -= MAGIC_INT;
178 fi[1].i -= MAGIC_INT;
179 fi[2].i -= MAGIC_INT;
180 fi[3].i -= MAGIC_INT;
181 fi += 4;
182 xp += 4;
183 };
184 if (remaining) {
185 double x0 = istep * xp[0];
186 double x1 = istep * xp[1];
187
188 x0 += MAGIC_FLOAT;
189 fi[0].f = x0;
190 x1 += MAGIC_FLOAT;
191 fi[1].f = x1;
192
193 fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
194 fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];
195
196 fi[0].i -= MAGIC_INT;
197 fi[1].i -= MAGIC_INT;
198 }
199
200 }
201
202
203 #else
204
205 /*********************************************************************
206 * XRPOW_FTOI is a macro to convert floats to ints.
207 * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
208 * ROUNDFAC= -0.0946
209 *
210 * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]
211 * ROUNDFAC=0.4054
212 *
213 * Note: using floor() or (int) is extremely slow. On machines where
214 * the TAKEHIRO_IEEE754_HACK code above does not work, it is worthwile
215 * to write some ASM for XRPOW_FTOI().
216 *********************************************************************/
217 #define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
218 #define QUANTFAC(rx) adj43[rx]
219 #define ROUNDFAC 0.4054
220
221
222 static void
quantize_lines_xrpow(unsigned int l,FLOAT istep,const FLOAT * xr,int * ix)223 quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xr, int *ix)
224 {
225 unsigned int remaining;
226
227 assert(l > 0);
228
229 l = l >> 1;
230 remaining = l % 2;
231 l = l >> 1;
232 while (l--) {
233 FLOAT x0, x1, x2, x3;
234 int rx0, rx1, rx2, rx3;
235
236 x0 = *xr++ * istep;
237 x1 = *xr++ * istep;
238 XRPOW_FTOI(x0, rx0);
239 x2 = *xr++ * istep;
240 XRPOW_FTOI(x1, rx1);
241 x3 = *xr++ * istep;
242 XRPOW_FTOI(x2, rx2);
243 x0 += QUANTFAC(rx0);
244 XRPOW_FTOI(x3, rx3);
245 x1 += QUANTFAC(rx1);
246 XRPOW_FTOI(x0, *ix++);
247 x2 += QUANTFAC(rx2);
248 XRPOW_FTOI(x1, *ix++);
249 x3 += QUANTFAC(rx3);
250 XRPOW_FTOI(x2, *ix++);
251 XRPOW_FTOI(x3, *ix++);
252 };
253 if (remaining) {
254 FLOAT x0, x1;
255 int rx0, rx1;
256
257 x0 = *xr++ * istep;
258 x1 = *xr++ * istep;
259 XRPOW_FTOI(x0, rx0);
260 XRPOW_FTOI(x1, rx1);
261 x0 += QUANTFAC(rx0);
262 x1 += QUANTFAC(rx1);
263 XRPOW_FTOI(x0, *ix++);
264 XRPOW_FTOI(x1, *ix++);
265 }
266
267 }
268
269
270
271 #endif
272
273
274
275 /*********************************************************************
276 * Quantization function
277 * This function will select which lines to quantize and call the
278 * proper quantization function
279 *********************************************************************/
280
281 static void
quantize_xrpow(const FLOAT * xp,int * pi,FLOAT istep,gr_info const * const cod_info,calc_noise_data const * prev_noise)282 quantize_xrpow(const FLOAT * xp, int *pi, FLOAT istep, gr_info const *const cod_info,
283 calc_noise_data const *prev_noise)
284 {
285 /* quantize on xr^(3/4) instead of xr */
286 int sfb;
287 int sfbmax;
288 int j = 0;
289 int prev_data_use;
290 int *iData;
291 int accumulate = 0;
292 int accumulate01 = 0;
293 int *acc_iData;
294 const FLOAT *acc_xp;
295
296 iData = pi;
297 acc_xp = xp;
298 acc_iData = iData;
299
300
301 /* Reusing previously computed data does not seems to work if global gain
302 is changed. Finding why it behaves this way would allow to use a cache of
303 previously computed values (let's 10 cached values per sfb) that would
304 probably provide a noticeable speedup */
305 prev_data_use = (prev_noise && (cod_info->global_gain == prev_noise->global_gain));
306
307 if (cod_info->block_type == SHORT_TYPE)
308 sfbmax = 38;
309 else
310 sfbmax = 21;
311
312 for (sfb = 0; sfb <= sfbmax; sfb++) {
313 int step = -1;
314
315 if (prev_data_use || cod_info->block_type == NORM_TYPE) {
316 step =
317 cod_info->global_gain
318 - ((cod_info->scalefac[sfb] + (cod_info->preflag ? pretab[sfb] : 0))
319 << (cod_info->scalefac_scale + 1))
320 - cod_info->subblock_gain[cod_info->window[sfb]] * 8;
321 }
322 assert(cod_info->width[sfb] >= 0);
323 if (prev_data_use && (prev_noise->step[sfb] == step)) {
324 /* do not recompute this part,
325 but compute accumulated lines */
326 if (accumulate) {
327 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
328 accumulate = 0;
329 }
330 if (accumulate01) {
331 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
332 accumulate01 = 0;
333 }
334 }
335 else { /*should compute this part */
336 int l;
337 l = cod_info->width[sfb];
338
339 if ((j + cod_info->width[sfb]) > cod_info->max_nonzero_coeff) {
340 /*do not compute upper zero part */
341 int usefullsize;
342 usefullsize = cod_info->max_nonzero_coeff - j + 1;
343 memset(&pi[cod_info->max_nonzero_coeff], 0,
344 sizeof(int) * (576 - cod_info->max_nonzero_coeff));
345 l = usefullsize;
346
347 if (l < 0) {
348 l = 0;
349 }
350
351 /* no need to compute higher sfb values */
352 sfb = sfbmax + 1;
353 }
354
355 /*accumulate lines to quantize */
356 if (!accumulate && !accumulate01) {
357 acc_iData = iData;
358 acc_xp = xp;
359 }
360 if (prev_noise &&
361 prev_noise->sfb_count1 > 0 &&
362 sfb >= prev_noise->sfb_count1 &&
363 prev_noise->step[sfb] > 0 && step >= prev_noise->step[sfb]) {
364
365 if (accumulate) {
366 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
367 accumulate = 0;
368 acc_iData = iData;
369 acc_xp = xp;
370 }
371 accumulate01 += l;
372 }
373 else {
374 if (accumulate01) {
375 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
376 accumulate01 = 0;
377 acc_iData = iData;
378 acc_xp = xp;
379 }
380 accumulate += l;
381 }
382
383 if (l <= 0) {
384 /* rh: 20040215
385 * may happen due to "prev_data_use" optimization
386 */
387 if (accumulate01) {
388 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
389 accumulate01 = 0;
390 }
391 if (accumulate) {
392 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
393 accumulate = 0;
394 }
395
396 break; /* ends for-loop */
397 }
398 }
399 if (sfb <= sfbmax) {
400 iData += cod_info->width[sfb];
401 xp += cod_info->width[sfb];
402 j += cod_info->width[sfb];
403 }
404 }
405 if (accumulate) { /*last data part */
406 quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
407 accumulate = 0;
408 }
409 if (accumulate01) { /*last data part */
410 quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
411 accumulate01 = 0;
412 }
413
414 }
415
416
417
418
419 /*************************************************************************/
420 /* ix_max */
421 /*************************************************************************/
422
423 static int
ix_max(const int * ix,const int * end)424 ix_max(const int *ix, const int *end)
425 {
426 int max1 = 0, max2 = 0;
427
428 do {
429 int const x1 = *ix++;
430 int const x2 = *ix++;
431 if (max1 < x1)
432 max1 = x1;
433
434 if (max2 < x2)
435 max2 = x2;
436 } while (ix < end);
437 if (max1 < max2)
438 max1 = max2;
439 return max1;
440 }
441
442
443
444
445
446
447
448
449 static int
count_bit_ESC(const int * ix,const int * const end,int t1,const int t2,unsigned int * const s)450 count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned int *const s)
451 {
452 /* ESC-table is used */
453 unsigned int const linbits = ht[t1].xlen * 65536u + ht[t2].xlen;
454 unsigned int sum = 0, sum2;
455
456 do {
457 unsigned int x = *ix++;
458 unsigned int y = *ix++;
459
460 if (x >= 15u) {
461 x = 15u;
462 sum += linbits;
463 }
464 if (y >= 15u) {
465 y = 15u;
466 sum += linbits;
467 }
468 x <<= 4u;
469 x += y;
470 sum += largetbl[x];
471 } while (ix < end);
472
473 sum2 = sum & 0xffffu;
474 sum >>= 16u;
475
476 if (sum > sum2) {
477 sum = sum2;
478 t1 = t2;
479 }
480
481 *s += sum;
482 return t1;
483 }
484
485
486 static int
count_bit_noESC(const int * ix,const int * end,int mx,unsigned int * s)487 count_bit_noESC(const int *ix, const int *end, int mx, unsigned int *s)
488 {
489 /* No ESC-words */
490 unsigned int sum1 = 0;
491 const uint8_t *const hlen1 = ht[1].hlen;
492 (void) mx;
493
494 do {
495 unsigned int const x0 = *ix++;
496 unsigned int const x1 = *ix++;
497 sum1 += hlen1[ x0+x0 + x1 ];
498 } while (ix < end);
499
500 *s += sum1;
501 return 1;
502 }
503
504
505 static const int huf_tbl_noESC[] = {
506 1, 2, 5, 7, 7, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13
507 };
508
509
510 static int
count_bit_noESC_from2(const int * ix,const int * end,int max,unsigned int * s)511 count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned int *s)
512 {
513 int t1 = huf_tbl_noESC[max - 1];
514 /* No ESC-words */
515 const unsigned int xlen = ht[t1].xlen;
516 uint32_t const* table = (t1 == 2) ? &table23[0] : &table56[0];
517 unsigned int sum = 0, sum2;
518
519 do {
520 unsigned int const x0 = *ix++;
521 unsigned int const x1 = *ix++;
522 sum += table[ x0 * xlen + x1 ];
523 } while (ix < end);
524
525 sum2 = sum & 0xffffu;
526 sum >>= 16u;
527
528 if (sum > sum2) {
529 sum = sum2;
530 t1++;
531 }
532
533 *s += sum;
534 return t1;
535 }
536
537
538 inline static int
count_bit_noESC_from3(const int * ix,const int * end,int max,unsigned int * s)539 count_bit_noESC_from3(const int *ix, const int *end, int max, unsigned int * s)
540 {
541 int t1 = huf_tbl_noESC[max - 1];
542 /* No ESC-words */
543 unsigned int sum1 = 0;
544 unsigned int sum2 = 0;
545 unsigned int sum3 = 0;
546 const unsigned int xlen = ht[t1].xlen;
547 const uint8_t *const hlen1 = ht[t1].hlen;
548 const uint8_t *const hlen2 = ht[t1 + 1].hlen;
549 const uint8_t *const hlen3 = ht[t1 + 2].hlen;
550 int t;
551
552 do {
553 unsigned int x0 = *ix++;
554 unsigned int x1 = *ix++;
555 unsigned int x = x0 * xlen + x1;
556 sum1 += hlen1[x];
557 sum2 += hlen2[x];
558 sum3 += hlen3[x];
559 } while (ix < end);
560
561 t = t1;
562 if (sum1 > sum2) {
563 sum1 = sum2;
564 t++;
565 }
566 if (sum1 > sum3) {
567 sum1 = sum3;
568 t = t1 + 2;
569 }
570 *s += sum1;
571
572 return t;
573 }
574
575
576 /*************************************************************************/
577 /* choose table */
578 /*************************************************************************/
579
580 /*
581 Choose the Huffman table that will encode ix[begin..end] with
582 the fewest bits.
583
584 Note: This code contains knowledge about the sizes and characteristics
585 of the Huffman tables as defined in the IS (Table B.7), and will not work
586 with any arbitrary tables.
587 */
count_bit_null(const int * ix,const int * end,int max,unsigned int * s)588 static int count_bit_null(const int* ix, const int* end, int max, unsigned int* s)
589 {
590 (void) ix;
591 (void) end;
592 (void) max;
593 (void) s;
594 return 0;
595 }
596
597 typedef int (*count_fnc)(const int* ix, const int* end, int max, unsigned int* s);
598
599 static const count_fnc count_fncs[] =
600 { &count_bit_null
601 , &count_bit_noESC
602 , &count_bit_noESC_from2
603 , &count_bit_noESC_from2
604 , &count_bit_noESC_from3
605 , &count_bit_noESC_from3
606 , &count_bit_noESC_from3
607 , &count_bit_noESC_from3
608 , &count_bit_noESC_from3
609 , &count_bit_noESC_from3
610 , &count_bit_noESC_from3
611 , &count_bit_noESC_from3
612 , &count_bit_noESC_from3
613 , &count_bit_noESC_from3
614 , &count_bit_noESC_from3
615 , &count_bit_noESC_from3
616 };
617
618 static int
choose_table_nonMMX(const int * ix,const int * const end,int * const _s)619 choose_table_nonMMX(const int *ix, const int *const end, int *const _s)
620 {
621 unsigned int* s = (unsigned int*)_s;
622 unsigned int max;
623 int choice, choice2;
624 max = ix_max(ix, end);
625
626 if (max <= 15) {
627 return count_fncs[max](ix, end, max, s);
628 }
629 /* try tables with linbits */
630 if (max > IXMAX_VAL) {
631 *s = LARGE_BITS;
632 return -1;
633 }
634 max -= 15u;
635 for (choice2 = 24; choice2 < 32; choice2++) {
636 if (ht[choice2].linmax >= max) {
637 break;
638 }
639 }
640
641 for (choice = choice2 - 8; choice < 24; choice++) {
642 if (ht[choice].linmax >= max) {
643 break;
644 }
645 }
646 return count_bit_ESC(ix, end, choice, choice2, s);
647 }
648
649
650
651 /*************************************************************************/
652 /* count_bit */
653 /*************************************************************************/
654 int
noquant_count_bits(lame_internal_flags const * const gfc,gr_info * const gi,calc_noise_data * prev_noise)655 noquant_count_bits(lame_internal_flags const *const gfc,
656 gr_info * const gi, calc_noise_data * prev_noise)
657 {
658 SessionConfig_t const *const cfg = &gfc->cfg;
659 int bits = 0;
660 int i, a1, a2;
661 int const *const ix = gi->l3_enc;
662
663 i = Min(576, ((gi->max_nonzero_coeff + 2) >> 1) << 1);
664
665 if (prev_noise)
666 prev_noise->sfb_count1 = 0;
667
668 /* Determine count1 region */
669 for (; i > 1; i -= 2)
670 if (ix[i - 1] | ix[i - 2])
671 break;
672 gi->count1 = i;
673
674 /* Determines the number of bits to encode the quadruples. */
675 a1 = a2 = 0;
676 for (; i > 3; i -= 4) {
677 int x4 = ix[i-4];
678 int x3 = ix[i-3];
679 int x2 = ix[i-2];
680 int x1 = ix[i-1];
681 int p;
682 /* hack to check if all values <= 1 */
683 if ((unsigned int) (x4 | x3 | x2 | x1) > 1)
684 break;
685
686 p = ((x4 * 2 + x3) * 2 + x2) * 2 + x1;
687 a1 += t32l[p];
688 a2 += t33l[p];
689 }
690
691 bits = a1;
692 gi->count1table_select = 0;
693 if (a1 > a2) {
694 bits = a2;
695 gi->count1table_select = 1;
696 }
697
698 gi->count1bits = bits;
699 gi->big_values = i;
700 if (i == 0)
701 return bits;
702
703 if (gi->block_type == SHORT_TYPE) {
704 a1 = 3 * gfc->scalefac_band.s[3];
705 if (a1 > gi->big_values)
706 a1 = gi->big_values;
707 a2 = gi->big_values;
708
709 }
710 else if (gi->block_type == NORM_TYPE) {
711 assert(i <= 576); /* bv_scf has 576 entries (0..575) */
712 a1 = gi->region0_count = gfc->sv_qnt.bv_scf[i - 2];
713 a2 = gi->region1_count = gfc->sv_qnt.bv_scf[i - 1];
714
715 assert(a1 + a2 + 2 < SBPSY_l);
716 a2 = gfc->scalefac_band.l[a1 + a2 + 2];
717 a1 = gfc->scalefac_band.l[a1 + 1];
718 if (a2 < i)
719 gi->table_select[2] = gfc->choose_table(ix + a2, ix + i, &bits);
720
721 }
722 else {
723 gi->region0_count = 7;
724 /*gi->region1_count = SBPSY_l - 7 - 1; */
725 gi->region1_count = SBMAX_l - 1 - 7 - 1;
726 a1 = gfc->scalefac_band.l[7 + 1];
727 a2 = i;
728 if (a1 > a2) {
729 a1 = a2;
730 }
731 }
732
733
734 /* have to allow for the case when bigvalues < region0 < region1 */
735 /* (and region0, region1 are ignored) */
736 a1 = Min(a1, i);
737 a2 = Min(a2, i);
738
739 assert(a1 >= 0);
740 assert(a2 >= 0);
741
742 /* Count the number of bits necessary to code the bigvalues region. */
743 if (0 < a1)
744 gi->table_select[0] = gfc->choose_table(ix, ix + a1, &bits);
745 if (a1 < a2)
746 gi->table_select[1] = gfc->choose_table(ix + a1, ix + a2, &bits);
747 if (cfg->use_best_huffman == 2) {
748 gi->part2_3_length = bits;
749 best_huffman_divide(gfc, gi);
750 bits = gi->part2_3_length;
751 }
752
753
754 if (prev_noise) {
755 if (gi->block_type == NORM_TYPE) {
756 int sfb = 0;
757 while (gfc->scalefac_band.l[sfb] < gi->big_values) {
758 sfb++;
759 }
760 prev_noise->sfb_count1 = sfb;
761 }
762 }
763
764 return bits;
765 }
766
767 int
count_bits(lame_internal_flags const * const gfc,const FLOAT * const xr,gr_info * const gi,calc_noise_data * prev_noise)768 count_bits(lame_internal_flags const *const gfc,
769 const FLOAT * const xr, gr_info * const gi, calc_noise_data * prev_noise)
770 {
771 int *const ix = gi->l3_enc;
772
773 /* since quantize_xrpow uses table lookup, we need to check this first: */
774 FLOAT const w = (IXMAX_VAL) / IPOW20(gi->global_gain);
775
776 if (gi->xrpow_max > w)
777 return LARGE_BITS;
778
779 quantize_xrpow(xr, ix, IPOW20(gi->global_gain), gi, prev_noise);
780
781 if (gfc->sv_qnt.substep_shaping & 2) {
782 int sfb, j = 0;
783 /* 0.634521682242439 = 0.5946*2**(.5*0.1875) */
784 int const gain = gi->global_gain + gi->scalefac_scale;
785 const FLOAT roundfac = 0.634521682242439 / IPOW20(gain);
786 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
787 int const width = gi->width[sfb];
788 assert(width >= 0);
789 if (!gfc->sv_qnt.pseudohalf[sfb]) {
790 j += width;
791 }
792 else {
793 int k;
794 for (k = j, j += width; k < j; ++k) {
795 ix[k] = (xr[k] >= roundfac) ? ix[k] : 0;
796 }
797 }
798 }
799 }
800 return noquant_count_bits(gfc, gi, prev_noise);
801 }
802
803 /***********************************************************************
804 re-calculate the best scalefac_compress using scfsi
805 the saved bits are kept in the bit reservoir.
806 **********************************************************************/
807
808
809 inline static void
recalc_divide_init(const lame_internal_flags * const gfc,gr_info const * cod_info,int const * const ix,int r01_bits[],int r01_div[],int r0_tbl[],int r1_tbl[])810 recalc_divide_init(const lame_internal_flags * const gfc,
811 gr_info const *cod_info,
812 int const *const ix, int r01_bits[], int r01_div[], int r0_tbl[], int r1_tbl[])
813 {
814 int r0, r1, bigv, r0t, r1t, bits;
815
816 bigv = cod_info->big_values;
817
818 for (r0 = 0; r0 <= 7 + 15; r0++) {
819 r01_bits[r0] = LARGE_BITS;
820 }
821
822 for (r0 = 0; r0 < 16; r0++) {
823 int const a1 = gfc->scalefac_band.l[r0 + 1];
824 int r0bits;
825 if (a1 >= bigv)
826 break;
827 r0bits = 0;
828 r0t = gfc->choose_table(ix, ix + a1, &r0bits);
829
830 for (r1 = 0; r1 < 8; r1++) {
831 int const a2 = gfc->scalefac_band.l[r0 + r1 + 2];
832 if (a2 >= bigv)
833 break;
834
835 bits = r0bits;
836 r1t = gfc->choose_table(ix + a1, ix + a2, &bits);
837 if (r01_bits[r0 + r1] > bits) {
838 r01_bits[r0 + r1] = bits;
839 r01_div[r0 + r1] = r0;
840 r0_tbl[r0 + r1] = r0t;
841 r1_tbl[r0 + r1] = r1t;
842 }
843 }
844 }
845 }
846
847 inline static void
recalc_divide_sub(const lame_internal_flags * const gfc,const gr_info * cod_info2,gr_info * const gi,const int * const ix,const int r01_bits[],const int r01_div[],const int r0_tbl[],const int r1_tbl[])848 recalc_divide_sub(const lame_internal_flags * const gfc,
849 const gr_info * cod_info2,
850 gr_info * const gi,
851 const int *const ix,
852 const int r01_bits[], const int r01_div[], const int r0_tbl[], const int r1_tbl[])
853 {
854 int bits, r2, a2, bigv, r2t;
855
856 bigv = cod_info2->big_values;
857
858 for (r2 = 2; r2 < SBMAX_l + 1; r2++) {
859 a2 = gfc->scalefac_band.l[r2];
860 if (a2 >= bigv)
861 break;
862
863 bits = r01_bits[r2 - 2] + cod_info2->count1bits;
864 if (gi->part2_3_length <= bits)
865 break;
866
867 r2t = gfc->choose_table(ix + a2, ix + bigv, &bits);
868 if (gi->part2_3_length <= bits)
869 continue;
870
871 memcpy(gi, cod_info2, sizeof(gr_info));
872 gi->part2_3_length = bits;
873 gi->region0_count = r01_div[r2 - 2];
874 gi->region1_count = r2 - 2 - r01_div[r2 - 2];
875 gi->table_select[0] = r0_tbl[r2 - 2];
876 gi->table_select[1] = r1_tbl[r2 - 2];
877 gi->table_select[2] = r2t;
878 }
879 }
880
881
882
883
884 void
best_huffman_divide(const lame_internal_flags * const gfc,gr_info * const gi)885 best_huffman_divide(const lame_internal_flags * const gfc, gr_info * const gi)
886 {
887 SessionConfig_t const *const cfg = &gfc->cfg;
888 int i, a1, a2;
889 gr_info cod_info2;
890 int const *const ix = gi->l3_enc;
891
892 int r01_bits[7 + 15 + 1];
893 int r01_div[7 + 15 + 1];
894 int r0_tbl[7 + 15 + 1];
895 int r1_tbl[7 + 15 + 1];
896
897
898 /* SHORT BLOCK stuff fails for MPEG2 */
899 if (gi->block_type == SHORT_TYPE && cfg->mode_gr == 1)
900 return;
901
902
903 memcpy(&cod_info2, gi, sizeof(gr_info));
904 if (gi->block_type == NORM_TYPE) {
905 recalc_divide_init(gfc, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
906 recalc_divide_sub(gfc, &cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
907 }
908
909 i = cod_info2.big_values;
910 if (i == 0 || (unsigned int) (ix[i - 2] | ix[i - 1]) > 1)
911 return;
912
913 i = gi->count1 + 2;
914 if (i > 576)
915 return;
916
917 /* Determines the number of bits to encode the quadruples. */
918 memcpy(&cod_info2, gi, sizeof(gr_info));
919 cod_info2.count1 = i;
920 a1 = a2 = 0;
921
922 assert(i <= 576);
923
924 for (; i > cod_info2.big_values; i -= 4) {
925 int const p = ((ix[i - 4] * 2 + ix[i - 3]) * 2 + ix[i - 2]) * 2 + ix[i - 1];
926 a1 += t32l[p];
927 a2 += t33l[p];
928 }
929 cod_info2.big_values = i;
930
931 cod_info2.count1table_select = 0;
932 if (a1 > a2) {
933 a1 = a2;
934 cod_info2.count1table_select = 1;
935 }
936
937 cod_info2.count1bits = a1;
938
939 if (cod_info2.block_type == NORM_TYPE)
940 recalc_divide_sub(gfc, &cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
941 else {
942 /* Count the number of bits necessary to code the bigvalues region. */
943 cod_info2.part2_3_length = a1;
944 a1 = gfc->scalefac_band.l[7 + 1];
945 if (a1 > i) {
946 a1 = i;
947 }
948 if (a1 > 0)
949 cod_info2.table_select[0] =
950 gfc->choose_table(ix, ix + a1, (int *) &cod_info2.part2_3_length);
951 if (i > a1)
952 cod_info2.table_select[1] =
953 gfc->choose_table(ix + a1, ix + i, (int *) &cod_info2.part2_3_length);
954 if (gi->part2_3_length > cod_info2.part2_3_length)
955 memcpy(gi, &cod_info2, sizeof(gr_info));
956 }
957 }
958
959 static const int slen1_n[16] = { 1, 1, 1, 1, 8, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16 };
960 static const int slen2_n[16] = { 1, 2, 4, 8, 1, 2, 4, 8, 2, 4, 8, 2, 4, 8, 4, 8 };
961 const int slen1_tab[16] = { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 };
962 const int slen2_tab[16] = { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 };
963
964 static void
scfsi_calc(int ch,III_side_info_t * l3_side)965 scfsi_calc(int ch, III_side_info_t * l3_side)
966 {
967 unsigned int i;
968 int s1, s2, c1, c2;
969 int sfb;
970 gr_info *const gi = &l3_side->tt[1][ch];
971 gr_info const *const g0 = &l3_side->tt[0][ch];
972
973 for (i = 0; i < (sizeof(scfsi_band) / sizeof(int)) - 1; i++) {
974 for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
975 if (g0->scalefac[sfb] != gi->scalefac[sfb]
976 && gi->scalefac[sfb] >= 0)
977 break;
978 }
979 if (sfb == scfsi_band[i + 1]) {
980 for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
981 gi->scalefac[sfb] = -1;
982 }
983 l3_side->scfsi[ch][i] = 1;
984 }
985 }
986
987 s1 = c1 = 0;
988 for (sfb = 0; sfb < 11; sfb++) {
989 if (gi->scalefac[sfb] == -1)
990 continue;
991 c1++;
992 if (s1 < gi->scalefac[sfb])
993 s1 = gi->scalefac[sfb];
994 }
995
996 s2 = c2 = 0;
997 for (; sfb < SBPSY_l; sfb++) {
998 if (gi->scalefac[sfb] == -1)
999 continue;
1000 c2++;
1001 if (s2 < gi->scalefac[sfb])
1002 s2 = gi->scalefac[sfb];
1003 }
1004
1005 for (i = 0; i < 16; i++) {
1006 if (s1 < slen1_n[i] && s2 < slen2_n[i]) {
1007 int const c = slen1_tab[i] * c1 + slen2_tab[i] * c2;
1008 if (gi->part2_length > c) {
1009 gi->part2_length = c;
1010 gi->scalefac_compress = (int)i;
1011 }
1012 }
1013 }
1014 }
1015
1016 /*
1017 Find the optimal way to store the scalefactors.
1018 Only call this routine after final scalefactors have been
1019 chosen and the channel/granule will not be re-encoded.
1020 */
1021 void
best_scalefac_store(const lame_internal_flags * gfc,const int gr,const int ch,III_side_info_t * const l3_side)1022 best_scalefac_store(const lame_internal_flags * gfc,
1023 const int gr, const int ch, III_side_info_t * const l3_side)
1024 {
1025 SessionConfig_t const *const cfg = &gfc->cfg;
1026 /* use scalefac_scale if we can */
1027 gr_info *const gi = &l3_side->tt[gr][ch];
1028 int sfb, i, j, l;
1029 int recalc = 0;
1030
1031 /* remove scalefacs from bands with ix=0. This idea comes
1032 * from the AAC ISO docs. added mt 3/00 */
1033 /* check if l3_enc=0 */
1034 j = 0;
1035 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
1036 int const width = gi->width[sfb];
1037 assert(width >= 0);
1038 for (l = j, j += width; l < j; ++l) {
1039 if (gi->l3_enc[l] != 0)
1040 break;
1041 }
1042 if (l == j)
1043 gi->scalefac[sfb] = recalc = -2; /* anything goes. */
1044 /* only best_scalefac_store and calc_scfsi
1045 * know--and only they should know--about the magic number -2.
1046 */
1047 }
1048
1049 if (!gi->scalefac_scale && !gi->preflag) {
1050 int s = 0;
1051 for (sfb = 0; sfb < gi->sfbmax; sfb++)
1052 if (gi->scalefac[sfb] > 0)
1053 s |= gi->scalefac[sfb];
1054
1055 if (!(s & 1) && s != 0) {
1056 for (sfb = 0; sfb < gi->sfbmax; sfb++)
1057 if (gi->scalefac[sfb] > 0)
1058 gi->scalefac[sfb] >>= 1;
1059
1060 gi->scalefac_scale = recalc = 1;
1061 }
1062 }
1063
1064 if (!gi->preflag && gi->block_type != SHORT_TYPE && cfg->mode_gr == 2) {
1065 for (sfb = 11; sfb < SBPSY_l; sfb++)
1066 if (gi->scalefac[sfb] < pretab[sfb] && gi->scalefac[sfb] != -2)
1067 break;
1068 if (sfb == SBPSY_l) {
1069 for (sfb = 11; sfb < SBPSY_l; sfb++)
1070 if (gi->scalefac[sfb] > 0)
1071 gi->scalefac[sfb] -= pretab[sfb];
1072
1073 gi->preflag = recalc = 1;
1074 }
1075 }
1076
1077 for (i = 0; i < 4; i++)
1078 l3_side->scfsi[ch][i] = 0;
1079
1080 if (cfg->mode_gr == 2 && gr == 1
1081 && l3_side->tt[0][ch].block_type != SHORT_TYPE
1082 && l3_side->tt[1][ch].block_type != SHORT_TYPE) {
1083 scfsi_calc(ch, l3_side);
1084 recalc = 0;
1085 }
1086 for (sfb = 0; sfb < gi->sfbmax; sfb++) {
1087 if (gi->scalefac[sfb] == -2) {
1088 gi->scalefac[sfb] = 0; /* if anything goes, then 0 is a good choice */
1089 }
1090 }
1091 if (recalc) {
1092 (void) scale_bitcount(gfc, gi);
1093 }
1094 }
1095
1096
1097 #ifndef NDEBUG
1098 static int
all_scalefactors_not_negative(int const * scalefac,int n)1099 all_scalefactors_not_negative(int const *scalefac, int n)
1100 {
1101 int i;
1102 for (i = 0; i < n; ++i) {
1103 if (scalefac[i] < 0)
1104 return 0;
1105 }
1106 return 1;
1107 }
1108 #endif
1109
1110
1111 /* number of bits used to encode scalefacs */
1112
1113 /* 18*slen1_tab[i] + 18*slen2_tab[i] */
1114 static const int scale_short[16] = {
1115 0, 18, 36, 54, 54, 36, 54, 72, 54, 72, 90, 72, 90, 108, 108, 126
1116 };
1117
1118 /* 17*slen1_tab[i] + 18*slen2_tab[i] */
1119 static const int scale_mixed[16] = {
1120 0, 18, 36, 54, 51, 35, 53, 71, 52, 70, 88, 69, 87, 105, 104, 122
1121 };
1122
1123 /* 11*slen1_tab[i] + 10*slen2_tab[i] */
1124 static const int scale_long[16] = {
1125 0, 10, 20, 30, 33, 21, 31, 41, 32, 42, 52, 43, 53, 63, 64, 74
1126 };
1127
1128
1129 /*************************************************************************/
1130 /* scale_bitcount */
1131 /*************************************************************************/
1132
1133 /* Also calculates the number of bits necessary to code the scalefactors. */
1134
1135 static int
mpeg1_scale_bitcount(const lame_internal_flags * gfc,gr_info * const cod_info)1136 mpeg1_scale_bitcount(const lame_internal_flags * gfc, gr_info * const cod_info)
1137 {
1138 int k, sfb, max_slen1 = 0, max_slen2 = 0;
1139
1140 /* maximum values */
1141 const int *tab;
1142 int *const scalefac = cod_info->scalefac;
1143
1144 (void) gfc;
1145 assert(all_scalefactors_not_negative(scalefac, cod_info->sfbmax));
1146
1147 if (cod_info->block_type == SHORT_TYPE) {
1148 tab = scale_short;
1149 if (cod_info->mixed_block_flag)
1150 tab = scale_mixed;
1151 }
1152 else { /* block_type == 1,2,or 3 */
1153 tab = scale_long;
1154 if (!cod_info->preflag) {
1155 for (sfb = 11; sfb < SBPSY_l; sfb++)
1156 if (scalefac[sfb] < pretab[sfb])
1157 break;
1158
1159 if (sfb == SBPSY_l) {
1160 cod_info->preflag = 1;
1161 for (sfb = 11; sfb < SBPSY_l; sfb++)
1162 scalefac[sfb] -= pretab[sfb];
1163 }
1164 }
1165 }
1166
1167 for (sfb = 0; sfb < cod_info->sfbdivide; sfb++)
1168 if (max_slen1 < scalefac[sfb])
1169 max_slen1 = scalefac[sfb];
1170
1171 for (; sfb < cod_info->sfbmax; sfb++)
1172 if (max_slen2 < scalefac[sfb])
1173 max_slen2 = scalefac[sfb];
1174
1175 /* from Takehiro TOMINAGA <tominaga@isoternet.org> 10/99
1176 * loop over *all* posible values of scalefac_compress to find the
1177 * one which uses the smallest number of bits. ISO would stop
1178 * at first valid index */
1179 cod_info->part2_length = LARGE_BITS;
1180 for (k = 0; k < 16; k++) {
1181 if (max_slen1 < slen1_n[k] && max_slen2 < slen2_n[k]
1182 && cod_info->part2_length > tab[k]) {
1183 cod_info->part2_length = tab[k];
1184 cod_info->scalefac_compress = k;
1185 }
1186 }
1187 return cod_info->part2_length == LARGE_BITS;
1188 }
1189
1190
1191
1192 /*
1193 table of largest scalefactor values for MPEG2
1194 */
1195 static const int max_range_sfac_tab[6][4] = {
1196 {15, 15, 7, 7},
1197 {15, 15, 7, 0},
1198 {7, 3, 0, 0},
1199 {15, 31, 31, 0},
1200 {7, 7, 7, 0},
1201 {3, 3, 0, 0}
1202 };
1203
1204
1205
1206
1207 /*************************************************************************/
1208 /* scale_bitcount_lsf */
1209 /*************************************************************************/
1210
1211 /* Also counts the number of bits to encode the scalefacs but for MPEG 2 */
1212 /* Lower sampling frequencies (24, 22.05 and 16 kHz.) */
1213
1214 /* This is reverse-engineered from section 2.4.3.2 of the MPEG2 IS, */
1215 /* "Audio Decoding Layer III" */
1216
1217 static int
mpeg2_scale_bitcount(const lame_internal_flags * gfc,gr_info * const cod_info)1218 mpeg2_scale_bitcount(const lame_internal_flags * gfc, gr_info * const cod_info)
1219 {
1220 int table_number, row_in_table, partition, nr_sfb, window, over;
1221 int i, sfb, max_sfac[4];
1222 const int *partition_table;
1223 int const *const scalefac = cod_info->scalefac;
1224
1225 /*
1226 Set partition table. Note that should try to use table one,
1227 but do not yet...
1228 */
1229 if (cod_info->preflag)
1230 table_number = 2;
1231 else
1232 table_number = 0;
1233
1234 for (i = 0; i < 4; i++)
1235 max_sfac[i] = 0;
1236
1237 if (cod_info->block_type == SHORT_TYPE) {
1238 row_in_table = 1;
1239 partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
1240 for (sfb = 0, partition = 0; partition < 4; partition++) {
1241 nr_sfb = partition_table[partition] / 3;
1242 for (i = 0; i < nr_sfb; i++, sfb++)
1243 for (window = 0; window < 3; window++)
1244 if (scalefac[sfb * 3 + window] > max_sfac[partition])
1245 max_sfac[partition] = scalefac[sfb * 3 + window];
1246 }
1247 }
1248 else {
1249 row_in_table = 0;
1250 partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
1251 for (sfb = 0, partition = 0; partition < 4; partition++) {
1252 nr_sfb = partition_table[partition];
1253 for (i = 0; i < nr_sfb; i++, sfb++)
1254 if (scalefac[sfb] > max_sfac[partition])
1255 max_sfac[partition] = scalefac[sfb];
1256 }
1257 }
1258
1259 for (over = 0, partition = 0; partition < 4; partition++) {
1260 if (max_sfac[partition] > max_range_sfac_tab[table_number][partition])
1261 over++;
1262 }
1263 if (!over) {
1264 /*
1265 Since no bands have been over-amplified, we can set scalefac_compress
1266 and slen[] for the formatter
1267 */
1268 static const int log2tab[] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 };
1269
1270 int slen1, slen2, slen3, slen4;
1271
1272 cod_info->sfb_partition_table = nr_of_sfb_block[table_number][row_in_table];
1273 for (partition = 0; partition < 4; partition++)
1274 cod_info->slen[partition] = log2tab[max_sfac[partition]];
1275
1276 /* set scalefac_compress */
1277 slen1 = cod_info->slen[0];
1278 slen2 = cod_info->slen[1];
1279 slen3 = cod_info->slen[2];
1280 slen4 = cod_info->slen[3];
1281
1282 switch (table_number) {
1283 case 0:
1284 cod_info->scalefac_compress = (((slen1 * 5) + slen2) << 4)
1285 + (slen3 << 2)
1286 + slen4;
1287 break;
1288
1289 case 1:
1290 cod_info->scalefac_compress = 400 + (((slen1 * 5) + slen2) << 2)
1291 + slen3;
1292 break;
1293
1294 case 2:
1295 cod_info->scalefac_compress = 500 + (slen1 * 3) + slen2;
1296 break;
1297
1298 default:
1299 ERRORF(gfc, "intensity stereo not implemented yet\n");
1300 break;
1301 }
1302 }
1303 #ifdef DEBUG
1304 if (over)
1305 ERRORF(gfc, "---WARNING !! Amplification of some bands over limits\n");
1306 #endif
1307 if (!over) {
1308 assert(cod_info->sfb_partition_table);
1309 cod_info->part2_length = 0;
1310 for (partition = 0; partition < 4; partition++)
1311 cod_info->part2_length +=
1312 cod_info->slen[partition] * cod_info->sfb_partition_table[partition];
1313 }
1314 return over;
1315 }
1316
1317
1318 int
scale_bitcount(const lame_internal_flags * gfc,gr_info * cod_info)1319 scale_bitcount(const lame_internal_flags * gfc, gr_info * cod_info)
1320 {
1321 if (gfc->cfg.mode_gr == 2) {
1322 return mpeg1_scale_bitcount(gfc, cod_info);
1323 }
1324 else {
1325 return mpeg2_scale_bitcount(gfc, cod_info);
1326 }
1327 }
1328
1329
1330 #ifdef MMX_choose_table
1331 extern int choose_table_MMX(const int *ix, const int *const end, int *const s);
1332 #endif
1333
1334 void
huffman_init(lame_internal_flags * const gfc)1335 huffman_init(lame_internal_flags * const gfc)
1336 {
1337 int i;
1338
1339 gfc->choose_table = choose_table_nonMMX;
1340
1341 #ifdef MMX_choose_table
1342 if (gfc->CPU_features.MMX) {
1343 gfc->choose_table = choose_table_MMX;
1344 }
1345 #endif
1346
1347 for (i = 2; i <= 576; i += 2) {
1348 int scfb_anz = 0, bv_index;
1349 while (gfc->scalefac_band.l[++scfb_anz] < i);
1350
1351 bv_index = subdv_table[scfb_anz].region0_count;
1352 while (gfc->scalefac_band.l[bv_index + 1] > i)
1353 bv_index--;
1354
1355 if (bv_index < 0) {
1356 /* this is an indication that everything is going to
1357 be encoded as region0: bigvalues < region0 < region1
1358 so lets set region0, region1 to some value larger
1359 than bigvalues */
1360 bv_index = subdv_table[scfb_anz].region0_count;
1361 }
1362
1363 gfc->sv_qnt.bv_scf[i - 2] = bv_index;
1364
1365 bv_index = subdv_table[scfb_anz].region1_count;
1366 while (gfc->scalefac_band.l[bv_index + gfc->sv_qnt.bv_scf[i - 2] + 2] > i)
1367 bv_index--;
1368
1369 if (bv_index < 0) {
1370 bv_index = subdv_table[scfb_anz].region1_count;
1371 }
1372
1373 gfc->sv_qnt.bv_scf[i - 1] = bv_index;
1374 }
1375 }
1376