1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2008-2009 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
5 /*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include <math.h>
35 #include "bands.h"
36 #include "modes.h"
37 #include "vq.h"
38 #include "cwrs.h"
39 #include "stack_alloc.h"
40 #include "os_support.h"
41 #include "mathops.h"
42 #include "rate.h"
43 #include "quant_bands.h"
44 #include "pitch.h"
45
hysteresis_decision(opus_val16 val,const opus_val16 * thresholds,const opus_val16 * hysteresis,int N,int prev)46 int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47 {
48 int i;
49 for (i=0;i<N;i++)
50 {
51 if (val < thresholds[i])
52 break;
53 }
54 if (i>prev && val < thresholds[prev]+hysteresis[prev])
55 i=prev;
56 if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57 i=prev;
58 return i;
59 }
60
celt_lcg_rand(opus_uint32 seed)61 opus_uint32 celt_lcg_rand(opus_uint32 seed)
62 {
63 return 1664525 * seed + 1013904223;
64 }
65
66 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67 with this approximation is important because it has an impact on the bit allocation */
bitexact_cos(opus_int16 x)68 static opus_int16 bitexact_cos(opus_int16 x)
69 {
70 opus_int32 tmp;
71 opus_int16 x2;
72 tmp = (4096+((opus_int32)(x)*(x)))>>13;
73 celt_assert(tmp<=32767);
74 x2 = tmp;
75 x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76 celt_assert(x2<=32766);
77 return 1+x2;
78 }
79
bitexact_log2tan(int isin,int icos)80 static int bitexact_log2tan(int isin,int icos)
81 {
82 int lc;
83 int ls;
84 lc=EC_ILOG(icos);
85 ls=EC_ILOG(isin);
86 icos<<=15-lc;
87 isin<<=15-ls;
88 return (ls-lc)*(1<<11)
89 +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90 -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91 }
92
93 #ifdef FIXED_POINT
94 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM)95 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
96 {
97 int i, c, N;
98 const opus_int16 *eBands = m->eBands;
99 N = m->shortMdctSize<<LM;
100 c=0; do {
101 for (i=0;i<end;i++)
102 {
103 int j;
104 opus_val32 maxval=0;
105 opus_val32 sum = 0;
106
107 maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
108 if (maxval > 0)
109 {
110 int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
111 j=eBands[i]<<LM;
112 if (shift>0)
113 {
114 do {
115 sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
116 EXTRACT16(SHR32(X[j+c*N],shift)));
117 } while (++j<eBands[i+1]<<LM);
118 } else {
119 do {
120 sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
121 EXTRACT16(SHL32(X[j+c*N],-shift)));
122 } while (++j<eBands[i+1]<<LM);
123 }
124 /* We're adding one here to ensure the normalized band isn't larger than unity norm */
125 bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
126 } else {
127 bandE[i+c*m->nbEBands] = EPSILON;
128 }
129 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
130 }
131 } while (++c<C);
132 /*printf ("\n");*/
133 }
134
135 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)136 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
137 {
138 int i, c, N;
139 const opus_int16 *eBands = m->eBands;
140 N = M*m->shortMdctSize;
141 c=0; do {
142 i=0; do {
143 opus_val16 g;
144 int j,shift;
145 opus_val16 E;
146 shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
147 E = VSHR32(bandE[i+c*m->nbEBands], shift);
148 g = EXTRACT16(celt_rcp(SHL32(E,3)));
149 j=M*eBands[i]; do {
150 X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
151 } while (++j<M*eBands[i+1]);
152 } while (++i<end);
153 } while (++c<C);
154 }
155
156 #else /* FIXED_POINT */
157 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM)158 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
159 {
160 int i, c, N;
161 const opus_int16 *eBands = m->eBands;
162 N = m->shortMdctSize<<LM;
163 c=0; do {
164 for (i=0;i<end;i++)
165 {
166 opus_val32 sum;
167 sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
168 bandE[i+c*m->nbEBands] = celt_sqrt(sum);
169 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
170 }
171 } while (++c<C);
172 /*printf ("\n");*/
173 }
174
175 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)176 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
177 {
178 int i, c, N;
179 const opus_int16 *eBands = m->eBands;
180 N = M*m->shortMdctSize;
181 c=0; do {
182 for (i=0;i<end;i++)
183 {
184 int j;
185 opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
186 for (j=M*eBands[i];j<M*eBands[i+1];j++)
187 X[j+c*N] = freq[j+c*N]*g;
188 }
189 } while (++c<C);
190 }
191
192 #endif /* FIXED_POINT */
193
194 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
denormalise_bands(const CELTMode * m,const celt_norm * OPUS_RESTRICT X,celt_sig * OPUS_RESTRICT freq,const opus_val16 * bandLogE,int start,int end,int M,int downsample,int silence)195 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
196 celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
197 int end, int M, int downsample, int silence)
198 {
199 int i, N;
200 int bound;
201 celt_sig * OPUS_RESTRICT f;
202 const celt_norm * OPUS_RESTRICT x;
203 const opus_int16 *eBands = m->eBands;
204 N = M*m->shortMdctSize;
205 bound = M*eBands[end];
206 if (downsample!=1)
207 bound = IMIN(bound, N/downsample);
208 if (silence)
209 {
210 bound = 0;
211 start = end = 0;
212 }
213 f = freq;
214 x = X+M*eBands[start];
215 for (i=0;i<M*eBands[start];i++)
216 *f++ = 0;
217 for (i=start;i<end;i++)
218 {
219 int j, band_end;
220 opus_val16 g;
221 opus_val16 lg;
222 #ifdef FIXED_POINT
223 int shift;
224 #endif
225 j=M*eBands[i];
226 band_end = M*eBands[i+1];
227 lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
228 #ifndef FIXED_POINT
229 g = celt_exp2(lg);
230 #else
231 /* Handle the integer part of the log energy */
232 shift = 16-(lg>>DB_SHIFT);
233 if (shift>31)
234 {
235 shift=0;
236 g=0;
237 } else {
238 /* Handle the fractional part. */
239 g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
240 }
241 /* Handle extreme gains with negative shift. */
242 if (shift<0)
243 {
244 /* For shift < -2 we'd be likely to overflow, so we're capping
245 the gain here. This shouldn't happen unless the bitstream is
246 already corrupted. */
247 if (shift < -2)
248 {
249 g = 32767;
250 shift = -2;
251 }
252 do {
253 *f++ = SHL32(MULT16_16(*x++, g), -shift);
254 } while (++j<band_end);
255 } else
256 #endif
257 /* Be careful of the fixed-point "else" just above when changing this code */
258 do {
259 *f++ = SHR32(MULT16_16(*x++, g), shift);
260 } while (++j<band_end);
261 }
262 celt_assert(start <= end);
263 OPUS_CLEAR(&freq[bound], N-bound);
264 }
265
266 /* This prevents energy collapse for transients with multiple short MDCTs */
anti_collapse(const CELTMode * m,celt_norm * X_,unsigned char * collapse_masks,int LM,int C,int size,int start,int end,const opus_val16 * logE,const opus_val16 * prev1logE,const opus_val16 * prev2logE,const int * pulses,opus_uint32 seed,int arch)267 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
268 int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
269 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
270 {
271 int c, i, j, k;
272 for (i=start;i<end;i++)
273 {
274 int N0;
275 opus_val16 thresh, sqrt_1;
276 int depth;
277 #ifdef FIXED_POINT
278 int shift;
279 opus_val32 thresh32;
280 #endif
281
282 N0 = m->eBands[i+1]-m->eBands[i];
283 /* depth in 1/8 bits */
284 celt_assert(pulses[i]>=0);
285 depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
286
287 #ifdef FIXED_POINT
288 thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
289 thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
290 {
291 opus_val32 t;
292 t = N0<<LM;
293 shift = celt_ilog2(t)>>1;
294 t = SHL32(t, (7-shift)<<1);
295 sqrt_1 = celt_rsqrt_norm(t);
296 }
297 #else
298 thresh = .5f*celt_exp2(-.125f*depth);
299 sqrt_1 = celt_rsqrt(N0<<LM);
300 #endif
301
302 c=0; do
303 {
304 celt_norm *X;
305 opus_val16 prev1;
306 opus_val16 prev2;
307 opus_val32 Ediff;
308 opus_val16 r;
309 int renormalize=0;
310 prev1 = prev1logE[c*m->nbEBands+i];
311 prev2 = prev2logE[c*m->nbEBands+i];
312 if (C==1)
313 {
314 prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
315 prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
316 }
317 Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
318 Ediff = MAX32(0, Ediff);
319
320 #ifdef FIXED_POINT
321 if (Ediff < 16384)
322 {
323 opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
324 r = 2*MIN16(16383,r32);
325 } else {
326 r = 0;
327 }
328 if (LM==3)
329 r = MULT16_16_Q14(23170, MIN32(23169, r));
330 r = SHR16(MIN16(thresh, r),1);
331 r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
332 #else
333 /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
334 short blocks don't have the same energy as long */
335 r = 2.f*celt_exp2(-Ediff);
336 if (LM==3)
337 r *= 1.41421356f;
338 r = MIN16(thresh, r);
339 r = r*sqrt_1;
340 #endif
341 X = X_+c*size+(m->eBands[i]<<LM);
342 for (k=0;k<1<<LM;k++)
343 {
344 /* Detect collapse */
345 if (!(collapse_masks[i*C+c]&1<<k))
346 {
347 /* Fill with noise */
348 for (j=0;j<N0;j++)
349 {
350 seed = celt_lcg_rand(seed);
351 X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
352 }
353 renormalize = 1;
354 }
355 }
356 /* We just added some energy, so we need to renormalise */
357 if (renormalize)
358 renormalise_vector(X, N0<<LM, Q15ONE, arch);
359 } while (++c<C);
360 }
361 }
362
intensity_stereo(const CELTMode * m,celt_norm * OPUS_RESTRICT X,const celt_norm * OPUS_RESTRICT Y,const celt_ener * bandE,int bandID,int N)363 static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
364 {
365 int i = bandID;
366 int j;
367 opus_val16 a1, a2;
368 opus_val16 left, right;
369 opus_val16 norm;
370 #ifdef FIXED_POINT
371 int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
372 #endif
373 left = VSHR32(bandE[i],shift);
374 right = VSHR32(bandE[i+m->nbEBands],shift);
375 norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
376 a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
377 a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
378 for (j=0;j<N;j++)
379 {
380 celt_norm r, l;
381 l = X[j];
382 r = Y[j];
383 X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
384 /* Side is not encoded, no need to calculate */
385 }
386 }
387
stereo_split(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,int N)388 static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
389 {
390 int j;
391 for (j=0;j<N;j++)
392 {
393 opus_val32 r, l;
394 l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
395 r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
396 X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
397 Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
398 }
399 }
400
stereo_merge(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,opus_val16 mid,int N,int arch)401 static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
402 {
403 int j;
404 opus_val32 xp=0, side=0;
405 opus_val32 El, Er;
406 opus_val16 mid2;
407 #ifdef FIXED_POINT
408 int kl, kr;
409 #endif
410 opus_val32 t, lgain, rgain;
411
412 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
413 dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
414 /* Compensating for the mid normalization */
415 xp = MULT16_32_Q15(mid, xp);
416 /* mid and side are in Q15, not Q14 like X and Y */
417 mid2 = SHR16(mid, 1);
418 El = MULT16_16(mid2, mid2) + side - 2*xp;
419 Er = MULT16_16(mid2, mid2) + side + 2*xp;
420 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
421 {
422 OPUS_COPY(Y, X, N);
423 return;
424 }
425
426 #ifdef FIXED_POINT
427 kl = celt_ilog2(El)>>1;
428 kr = celt_ilog2(Er)>>1;
429 #endif
430 t = VSHR32(El, (kl-7)<<1);
431 lgain = celt_rsqrt_norm(t);
432 t = VSHR32(Er, (kr-7)<<1);
433 rgain = celt_rsqrt_norm(t);
434
435 #ifdef FIXED_POINT
436 if (kl < 7)
437 kl = 7;
438 if (kr < 7)
439 kr = 7;
440 #endif
441
442 for (j=0;j<N;j++)
443 {
444 celt_norm r, l;
445 /* Apply mid scaling (side is already scaled) */
446 l = MULT16_16_P15(mid, X[j]);
447 r = Y[j];
448 X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
449 Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
450 }
451 }
452
453 /* Decide whether we should spread the pulses in the current frame */
spreading_decision(const CELTMode * m,const celt_norm * X,int * average,int last_decision,int * hf_average,int * tapset_decision,int update_hf,int end,int C,int M)454 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
455 int last_decision, int *hf_average, int *tapset_decision, int update_hf,
456 int end, int C, int M)
457 {
458 int i, c, N0;
459 int sum = 0, nbBands=0;
460 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
461 int decision;
462 int hf_sum=0;
463
464 celt_assert(end>0);
465
466 N0 = M*m->shortMdctSize;
467
468 if (M*(eBands[end]-eBands[end-1]) <= 8)
469 return SPREAD_NONE;
470 c=0; do {
471 for (i=0;i<end;i++)
472 {
473 int j, N, tmp=0;
474 int tcount[3] = {0,0,0};
475 const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
476 N = M*(eBands[i+1]-eBands[i]);
477 if (N<=8)
478 continue;
479 /* Compute rough CDF of |x[j]| */
480 for (j=0;j<N;j++)
481 {
482 opus_val32 x2N; /* Q13 */
483
484 x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
485 if (x2N < QCONST16(0.25f,13))
486 tcount[0]++;
487 if (x2N < QCONST16(0.0625f,13))
488 tcount[1]++;
489 if (x2N < QCONST16(0.015625f,13))
490 tcount[2]++;
491 }
492
493 /* Only include four last bands (8 kHz and up) */
494 if (i>m->nbEBands-4)
495 hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
496 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
497 sum += tmp*256;
498 nbBands++;
499 }
500 } while (++c<C);
501
502 if (update_hf)
503 {
504 if (hf_sum)
505 hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
506 *hf_average = (*hf_average+hf_sum)>>1;
507 hf_sum = *hf_average;
508 if (*tapset_decision==2)
509 hf_sum += 4;
510 else if (*tapset_decision==0)
511 hf_sum -= 4;
512 if (hf_sum > 22)
513 *tapset_decision=2;
514 else if (hf_sum > 18)
515 *tapset_decision=1;
516 else
517 *tapset_decision=0;
518 }
519 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
520 celt_assert(nbBands>0); /* end has to be non-zero */
521 celt_assert(sum>=0);
522 sum = celt_udiv(sum, nbBands);
523 /* Recursive averaging */
524 sum = (sum+*average)>>1;
525 *average = sum;
526 /* Hysteresis */
527 sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
528 if (sum < 80)
529 {
530 decision = SPREAD_AGGRESSIVE;
531 } else if (sum < 256)
532 {
533 decision = SPREAD_NORMAL;
534 } else if (sum < 384)
535 {
536 decision = SPREAD_LIGHT;
537 } else {
538 decision = SPREAD_NONE;
539 }
540 #ifdef FUZZING
541 decision = rand()&0x3;
542 *tapset_decision=rand()%3;
543 #endif
544 return decision;
545 }
546
547 /* Indexing table for converting from natural Hadamard to ordery Hadamard
548 This is essentially a bit-reversed Gray, on top of which we've added
549 an inversion of the order because we want the DC at the end rather than
550 the beginning. The lines are for N=2, 4, 8, 16 */
551 static const int ordery_table[] = {
552 1, 0,
553 3, 0, 2, 1,
554 7, 0, 4, 3, 6, 1, 5, 2,
555 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5,
556 };
557
deinterleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)558 static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
559 {
560 int i,j;
561 VARDECL(celt_norm, tmp);
562 int N;
563 SAVE_STACK;
564 N = N0*stride;
565 ALLOC(tmp, N, celt_norm);
566 celt_assert(stride>0);
567 if (hadamard)
568 {
569 const int *ordery = ordery_table+stride-2;
570 for (i=0;i<stride;i++)
571 {
572 for (j=0;j<N0;j++)
573 tmp[ordery[i]*N0+j] = X[j*stride+i];
574 }
575 } else {
576 for (i=0;i<stride;i++)
577 for (j=0;j<N0;j++)
578 tmp[i*N0+j] = X[j*stride+i];
579 }
580 OPUS_COPY(X, tmp, N);
581 RESTORE_STACK;
582 }
583
interleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)584 static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
585 {
586 int i,j;
587 VARDECL(celt_norm, tmp);
588 int N;
589 SAVE_STACK;
590 N = N0*stride;
591 ALLOC(tmp, N, celt_norm);
592 if (hadamard)
593 {
594 const int *ordery = ordery_table+stride-2;
595 for (i=0;i<stride;i++)
596 for (j=0;j<N0;j++)
597 tmp[j*stride+i] = X[ordery[i]*N0+j];
598 } else {
599 for (i=0;i<stride;i++)
600 for (j=0;j<N0;j++)
601 tmp[j*stride+i] = X[i*N0+j];
602 }
603 OPUS_COPY(X, tmp, N);
604 RESTORE_STACK;
605 }
606
haar1(celt_norm * X,int N0,int stride)607 void haar1(celt_norm *X, int N0, int stride)
608 {
609 int i, j;
610 N0 >>= 1;
611 for (i=0;i<stride;i++)
612 for (j=0;j<N0;j++)
613 {
614 opus_val32 tmp1, tmp2;
615 tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
616 tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
617 X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
618 X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
619 }
620 }
621
compute_qn(int N,int b,int offset,int pulse_cap,int stereo)622 static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
623 {
624 static const opus_int16 exp2_table8[8] =
625 {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
626 int qn, qb;
627 int N2 = 2*N-1;
628 if (stereo && N==2)
629 N2--;
630 /* The upper limit ensures that in a stereo split with itheta==16384, we'll
631 always have enough bits left over to code at least one pulse in the
632 side; otherwise it would collapse, since it doesn't get folded. */
633 qb = celt_sudiv(b+N2*offset, N2);
634 qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
635
636 qb = IMIN(8<<BITRES, qb);
637
638 if (qb<(1<<BITRES>>1)) {
639 qn = 1;
640 } else {
641 qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
642 qn = (qn+1)>>1<<1;
643 }
644 celt_assert(qn <= 256);
645 return qn;
646 }
647
648 struct band_ctx {
649 int encode;
650 const CELTMode *m;
651 int i;
652 int intensity;
653 int spread;
654 int tf_change;
655 ec_ctx *ec;
656 opus_int32 remaining_bits;
657 const celt_ener *bandE;
658 opus_uint32 seed;
659 int arch;
660 };
661
662 struct split_ctx {
663 int inv;
664 int imid;
665 int iside;
666 int delta;
667 int itheta;
668 int qalloc;
669 };
670
compute_theta(struct band_ctx * ctx,struct split_ctx * sctx,celt_norm * X,celt_norm * Y,int N,int * b,int B,int B0,int LM,int stereo,int * fill)671 static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
672 celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
673 int LM,
674 int stereo, int *fill)
675 {
676 int qn;
677 int itheta=0;
678 int delta;
679 int imid, iside;
680 int qalloc;
681 int pulse_cap;
682 int offset;
683 opus_int32 tell;
684 int inv=0;
685 int encode;
686 const CELTMode *m;
687 int i;
688 int intensity;
689 ec_ctx *ec;
690 const celt_ener *bandE;
691
692 encode = ctx->encode;
693 m = ctx->m;
694 i = ctx->i;
695 intensity = ctx->intensity;
696 ec = ctx->ec;
697 bandE = ctx->bandE;
698
699 /* Decide on the resolution to give to the split parameter theta */
700 pulse_cap = m->logN[i]+LM*(1<<BITRES);
701 offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
702 qn = compute_qn(N, *b, offset, pulse_cap, stereo);
703 if (stereo && i>=intensity)
704 qn = 1;
705 if (encode)
706 {
707 /* theta is the atan() of the ratio between the (normalized)
708 side and mid. With just that parameter, we can re-scale both
709 mid and side because we know that 1) they have unit norm and
710 2) they are orthogonal. */
711 itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
712 }
713 tell = ec_tell_frac(ec);
714 if (qn!=1)
715 {
716 if (encode)
717 itheta = (itheta*(opus_int32)qn+8192)>>14;
718
719 /* Entropy coding of the angle. We use a uniform pdf for the
720 time split, a step for stereo, and a triangular one for the rest. */
721 if (stereo && N>2)
722 {
723 int p0 = 3;
724 int x = itheta;
725 int x0 = qn/2;
726 int ft = p0*(x0+1) + x0;
727 /* Use a probability of p0 up to itheta=8192 and then use 1 after */
728 if (encode)
729 {
730 ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
731 } else {
732 int fs;
733 fs=ec_decode(ec,ft);
734 if (fs<(x0+1)*p0)
735 x=fs/p0;
736 else
737 x=x0+1+(fs-(x0+1)*p0);
738 ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
739 itheta = x;
740 }
741 } else if (B0>1 || stereo) {
742 /* Uniform pdf */
743 if (encode)
744 ec_enc_uint(ec, itheta, qn+1);
745 else
746 itheta = ec_dec_uint(ec, qn+1);
747 } else {
748 int fs=1, ft;
749 ft = ((qn>>1)+1)*((qn>>1)+1);
750 if (encode)
751 {
752 int fl;
753
754 fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
755 fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
756 ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
757
758 ec_encode(ec, fl, fl+fs, ft);
759 } else {
760 /* Triangular pdf */
761 int fl=0;
762 int fm;
763 fm = ec_decode(ec, ft);
764
765 if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
766 {
767 itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
768 fs = itheta + 1;
769 fl = itheta*(itheta + 1)>>1;
770 }
771 else
772 {
773 itheta = (2*(qn + 1)
774 - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
775 fs = qn + 1 - itheta;
776 fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
777 }
778
779 ec_dec_update(ec, fl, fl+fs, ft);
780 }
781 }
782 celt_assert(itheta>=0);
783 itheta = celt_udiv((opus_int32)itheta*16384, qn);
784 if (encode && stereo)
785 {
786 if (itheta==0)
787 intensity_stereo(m, X, Y, bandE, i, N);
788 else
789 stereo_split(X, Y, N);
790 }
791 /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
792 Let's do that at higher complexity */
793 } else if (stereo) {
794 if (encode)
795 {
796 inv = itheta > 8192;
797 if (inv)
798 {
799 int j;
800 for (j=0;j<N;j++)
801 Y[j] = -Y[j];
802 }
803 intensity_stereo(m, X, Y, bandE, i, N);
804 }
805 if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
806 {
807 if (encode)
808 ec_enc_bit_logp(ec, inv, 2);
809 else
810 inv = ec_dec_bit_logp(ec, 2);
811 } else
812 inv = 0;
813 itheta = 0;
814 }
815 qalloc = ec_tell_frac(ec) - tell;
816 *b -= qalloc;
817
818 if (itheta == 0)
819 {
820 imid = 32767;
821 iside = 0;
822 *fill &= (1<<B)-1;
823 delta = -16384;
824 } else if (itheta == 16384)
825 {
826 imid = 0;
827 iside = 32767;
828 *fill &= ((1<<B)-1)<<B;
829 delta = 16384;
830 } else {
831 imid = bitexact_cos((opus_int16)itheta);
832 iside = bitexact_cos((opus_int16)(16384-itheta));
833 /* This is the mid vs side allocation that minimizes squared error
834 in that band. */
835 delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
836 }
837
838 sctx->inv = inv;
839 sctx->imid = imid;
840 sctx->iside = iside;
841 sctx->delta = delta;
842 sctx->itheta = itheta;
843 sctx->qalloc = qalloc;
844 }
quant_band_n1(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int b,celt_norm * lowband_out)845 static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
846 celt_norm *lowband_out)
847 {
848 #ifdef RESYNTH
849 int resynth = 1;
850 #else
851 int resynth = !ctx->encode;
852 #endif
853 int c;
854 int stereo;
855 celt_norm *x = X;
856 int encode;
857 ec_ctx *ec;
858
859 encode = ctx->encode;
860 ec = ctx->ec;
861
862 stereo = Y != NULL;
863 c=0; do {
864 int sign=0;
865 if (ctx->remaining_bits>=1<<BITRES)
866 {
867 if (encode)
868 {
869 sign = x[0]<0;
870 ec_enc_bits(ec, sign, 1);
871 } else {
872 sign = ec_dec_bits(ec, 1);
873 }
874 ctx->remaining_bits -= 1<<BITRES;
875 b-=1<<BITRES;
876 }
877 if (resynth)
878 x[0] = sign ? -NORM_SCALING : NORM_SCALING;
879 x = Y;
880 } while (++c<1+stereo);
881 if (lowband_out)
882 lowband_out[0] = SHR16(X[0],4);
883 return 1;
884 }
885
886 /* This function is responsible for encoding and decoding a mono partition.
887 It can split the band in two and transmit the energy difference with
888 the two half-bands. It can be called recursively so bands can end up being
889 split in 8 parts. */
quant_partition(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,opus_val16 gain,int fill)890 static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
891 int N, int b, int B, celt_norm *lowband,
892 int LM,
893 opus_val16 gain, int fill)
894 {
895 const unsigned char *cache;
896 int q;
897 int curr_bits;
898 int imid=0, iside=0;
899 int B0=B;
900 opus_val16 mid=0, side=0;
901 unsigned cm=0;
902 #ifdef RESYNTH
903 int resynth = 1;
904 #else
905 int resynth = !ctx->encode;
906 #endif
907 celt_norm *Y=NULL;
908 int encode;
909 const CELTMode *m;
910 int i;
911 int spread;
912 ec_ctx *ec;
913
914 encode = ctx->encode;
915 m = ctx->m;
916 i = ctx->i;
917 spread = ctx->spread;
918 ec = ctx->ec;
919
920 /* If we need 1.5 more bit than we can produce, split the band in two. */
921 cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
922 if (LM != -1 && b > cache[cache[0]]+12 && N>2)
923 {
924 int mbits, sbits, delta;
925 int itheta;
926 int qalloc;
927 struct split_ctx sctx;
928 celt_norm *next_lowband2=NULL;
929 opus_int32 rebalance;
930
931 N >>= 1;
932 Y = X+N;
933 LM -= 1;
934 if (B==1)
935 fill = (fill&1)|(fill<<1);
936 B = (B+1)>>1;
937
938 compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
939 LM, 0, &fill);
940 imid = sctx.imid;
941 iside = sctx.iside;
942 delta = sctx.delta;
943 itheta = sctx.itheta;
944 qalloc = sctx.qalloc;
945 #ifdef FIXED_POINT
946 mid = imid;
947 side = iside;
948 #else
949 mid = (1.f/32768)*imid;
950 side = (1.f/32768)*iside;
951 #endif
952
953 /* Give more bits to low-energy MDCTs than they would otherwise deserve */
954 if (B0>1 && (itheta&0x3fff))
955 {
956 if (itheta > 8192)
957 /* Rough approximation for pre-echo masking */
958 delta -= delta>>(4-LM);
959 else
960 /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
961 delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
962 }
963 mbits = IMAX(0, IMIN(b, (b-delta)/2));
964 sbits = b-mbits;
965 ctx->remaining_bits -= qalloc;
966
967 if (lowband)
968 next_lowband2 = lowband+N; /* >32-bit split case */
969
970 rebalance = ctx->remaining_bits;
971 if (mbits >= sbits)
972 {
973 cm = quant_partition(ctx, X, N, mbits, B,
974 lowband, LM,
975 MULT16_16_P15(gain,mid), fill);
976 rebalance = mbits - (rebalance-ctx->remaining_bits);
977 if (rebalance > 3<<BITRES && itheta!=0)
978 sbits += rebalance - (3<<BITRES);
979 cm |= quant_partition(ctx, Y, N, sbits, B,
980 next_lowband2, LM,
981 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
982 } else {
983 cm = quant_partition(ctx, Y, N, sbits, B,
984 next_lowband2, LM,
985 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
986 rebalance = sbits - (rebalance-ctx->remaining_bits);
987 if (rebalance > 3<<BITRES && itheta!=16384)
988 mbits += rebalance - (3<<BITRES);
989 cm |= quant_partition(ctx, X, N, mbits, B,
990 lowband, LM,
991 MULT16_16_P15(gain,mid), fill);
992 }
993 } else {
994 /* This is the basic no-split case */
995 q = bits2pulses(m, i, LM, b);
996 curr_bits = pulses2bits(m, i, LM, q);
997 ctx->remaining_bits -= curr_bits;
998
999 /* Ensures we can never bust the budget */
1000 while (ctx->remaining_bits < 0 && q > 0)
1001 {
1002 ctx->remaining_bits += curr_bits;
1003 q--;
1004 curr_bits = pulses2bits(m, i, LM, q);
1005 ctx->remaining_bits -= curr_bits;
1006 }
1007
1008 if (q!=0)
1009 {
1010 int K = get_pulses(q);
1011
1012 /* Finally do the actual quantization */
1013 if (encode)
1014 {
1015 cm = alg_quant(X, N, K, spread, B, ec
1016 #ifdef RESYNTH
1017 , gain
1018 #endif
1019 );
1020 } else {
1021 cm = alg_unquant(X, N, K, spread, B, ec, gain);
1022 }
1023 } else {
1024 /* If there's no pulse, fill the band anyway */
1025 int j;
1026 if (resynth)
1027 {
1028 unsigned cm_mask;
1029 /* B can be as large as 16, so this shift might overflow an int on a
1030 16-bit platform; use a long to get defined behavior.*/
1031 cm_mask = (unsigned)(1UL<<B)-1;
1032 fill &= cm_mask;
1033 if (!fill)
1034 {
1035 OPUS_CLEAR(X, N);
1036 } else {
1037 if (lowband == NULL)
1038 {
1039 /* Noise */
1040 for (j=0;j<N;j++)
1041 {
1042 ctx->seed = celt_lcg_rand(ctx->seed);
1043 X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
1044 }
1045 cm = cm_mask;
1046 } else {
1047 /* Folded spectrum */
1048 for (j=0;j<N;j++)
1049 {
1050 opus_val16 tmp;
1051 ctx->seed = celt_lcg_rand(ctx->seed);
1052 /* About 48 dB below the "normal" folding level */
1053 tmp = QCONST16(1.0f/256, 10);
1054 tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1055 X[j] = lowband[j]+tmp;
1056 }
1057 cm = fill;
1058 }
1059 renormalise_vector(X, N, gain, ctx->arch);
1060 }
1061 }
1062 }
1063 }
1064
1065 return cm;
1066 }
1067
1068
1069 /* This function is responsible for encoding and decoding a band for the mono case. */
quant_band(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,opus_val16 gain,celt_norm * lowband_scratch,int fill)1070 static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1071 int N, int b, int B, celt_norm *lowband,
1072 int LM, celt_norm *lowband_out,
1073 opus_val16 gain, celt_norm *lowband_scratch, int fill)
1074 {
1075 int N0=N;
1076 int N_B=N;
1077 int N_B0;
1078 int B0=B;
1079 int time_divide=0;
1080 int recombine=0;
1081 int longBlocks;
1082 unsigned cm=0;
1083 #ifdef RESYNTH
1084 int resynth = 1;
1085 #else
1086 int resynth = !ctx->encode;
1087 #endif
1088 int k;
1089 int encode;
1090 int tf_change;
1091
1092 encode = ctx->encode;
1093 tf_change = ctx->tf_change;
1094
1095 longBlocks = B0==1;
1096
1097 N_B = celt_udiv(N_B, B);
1098
1099 /* Special case for one sample */
1100 if (N==1)
1101 {
1102 return quant_band_n1(ctx, X, NULL, b, lowband_out);
1103 }
1104
1105 if (tf_change>0)
1106 recombine = tf_change;
1107 /* Band recombining to increase frequency resolution */
1108
1109 if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1110 {
1111 OPUS_COPY(lowband_scratch, lowband, N);
1112 lowband = lowband_scratch;
1113 }
1114
1115 for (k=0;k<recombine;k++)
1116 {
1117 static const unsigned char bit_interleave_table[16]={
1118 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1119 };
1120 if (encode)
1121 haar1(X, N>>k, 1<<k);
1122 if (lowband)
1123 haar1(lowband, N>>k, 1<<k);
1124 fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1125 }
1126 B>>=recombine;
1127 N_B<<=recombine;
1128
1129 /* Increasing the time resolution */
1130 while ((N_B&1) == 0 && tf_change<0)
1131 {
1132 if (encode)
1133 haar1(X, N_B, B);
1134 if (lowband)
1135 haar1(lowband, N_B, B);
1136 fill |= fill<<B;
1137 B <<= 1;
1138 N_B >>= 1;
1139 time_divide++;
1140 tf_change++;
1141 }
1142 B0=B;
1143 N_B0 = N_B;
1144
1145 /* Reorganize the samples in time order instead of frequency order */
1146 if (B0>1)
1147 {
1148 if (encode)
1149 deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1150 if (lowband)
1151 deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1152 }
1153
1154 cm = quant_partition(ctx, X, N, b, B, lowband,
1155 LM, gain, fill);
1156
1157 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1158 if (resynth)
1159 {
1160 /* Undo the sample reorganization going from time order to frequency order */
1161 if (B0>1)
1162 interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1163
1164 /* Undo time-freq changes that we did earlier */
1165 N_B = N_B0;
1166 B = B0;
1167 for (k=0;k<time_divide;k++)
1168 {
1169 B >>= 1;
1170 N_B <<= 1;
1171 cm |= cm>>B;
1172 haar1(X, N_B, B);
1173 }
1174
1175 for (k=0;k<recombine;k++)
1176 {
1177 static const unsigned char bit_deinterleave_table[16]={
1178 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1179 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1180 };
1181 cm = bit_deinterleave_table[cm];
1182 haar1(X, N0>>k, 1<<k);
1183 }
1184 B<<=recombine;
1185
1186 /* Scale output for later folding */
1187 if (lowband_out)
1188 {
1189 int j;
1190 opus_val16 n;
1191 n = celt_sqrt(SHL32(EXTEND32(N0),22));
1192 for (j=0;j<N0;j++)
1193 lowband_out[j] = MULT16_16_Q15(n,X[j]);
1194 }
1195 cm &= (1<<B)-1;
1196 }
1197 return cm;
1198 }
1199
1200
1201 /* This function is responsible for encoding and decoding a band for the stereo case. */
quant_band_stereo(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,celt_norm * lowband_scratch,int fill)1202 static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1203 int N, int b, int B, celt_norm *lowband,
1204 int LM, celt_norm *lowband_out,
1205 celt_norm *lowband_scratch, int fill)
1206 {
1207 int imid=0, iside=0;
1208 int inv = 0;
1209 opus_val16 mid=0, side=0;
1210 unsigned cm=0;
1211 #ifdef RESYNTH
1212 int resynth = 1;
1213 #else
1214 int resynth = !ctx->encode;
1215 #endif
1216 int mbits, sbits, delta;
1217 int itheta;
1218 int qalloc;
1219 struct split_ctx sctx;
1220 int orig_fill;
1221 int encode;
1222 ec_ctx *ec;
1223
1224 encode = ctx->encode;
1225 ec = ctx->ec;
1226
1227 /* Special case for one sample */
1228 if (N==1)
1229 {
1230 return quant_band_n1(ctx, X, Y, b, lowband_out);
1231 }
1232
1233 orig_fill = fill;
1234
1235 compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
1236 LM, 1, &fill);
1237 inv = sctx.inv;
1238 imid = sctx.imid;
1239 iside = sctx.iside;
1240 delta = sctx.delta;
1241 itheta = sctx.itheta;
1242 qalloc = sctx.qalloc;
1243 #ifdef FIXED_POINT
1244 mid = imid;
1245 side = iside;
1246 #else
1247 mid = (1.f/32768)*imid;
1248 side = (1.f/32768)*iside;
1249 #endif
1250
1251 /* This is a special case for N=2 that only works for stereo and takes
1252 advantage of the fact that mid and side are orthogonal to encode
1253 the side with just one bit. */
1254 if (N==2)
1255 {
1256 int c;
1257 int sign=0;
1258 celt_norm *x2, *y2;
1259 mbits = b;
1260 sbits = 0;
1261 /* Only need one bit for the side. */
1262 if (itheta != 0 && itheta != 16384)
1263 sbits = 1<<BITRES;
1264 mbits -= sbits;
1265 c = itheta > 8192;
1266 ctx->remaining_bits -= qalloc+sbits;
1267
1268 x2 = c ? Y : X;
1269 y2 = c ? X : Y;
1270 if (sbits)
1271 {
1272 if (encode)
1273 {
1274 /* Here we only need to encode a sign for the side. */
1275 sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
1276 ec_enc_bits(ec, sign, 1);
1277 } else {
1278 sign = ec_dec_bits(ec, 1);
1279 }
1280 }
1281 sign = 1-2*sign;
1282 /* We use orig_fill here because we want to fold the side, but if
1283 itheta==16384, we'll have cleared the low bits of fill. */
1284 cm = quant_band(ctx, x2, N, mbits, B, lowband,
1285 LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
1286 /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1287 and there's no need to worry about mixing with the other channel. */
1288 y2[0] = -sign*x2[1];
1289 y2[1] = sign*x2[0];
1290 if (resynth)
1291 {
1292 celt_norm tmp;
1293 X[0] = MULT16_16_Q15(mid, X[0]);
1294 X[1] = MULT16_16_Q15(mid, X[1]);
1295 Y[0] = MULT16_16_Q15(side, Y[0]);
1296 Y[1] = MULT16_16_Q15(side, Y[1]);
1297 tmp = X[0];
1298 X[0] = SUB16(tmp,Y[0]);
1299 Y[0] = ADD16(tmp,Y[0]);
1300 tmp = X[1];
1301 X[1] = SUB16(tmp,Y[1]);
1302 Y[1] = ADD16(tmp,Y[1]);
1303 }
1304 } else {
1305 /* "Normal" split code */
1306 opus_int32 rebalance;
1307
1308 mbits = IMAX(0, IMIN(b, (b-delta)/2));
1309 sbits = b-mbits;
1310 ctx->remaining_bits -= qalloc;
1311
1312 rebalance = ctx->remaining_bits;
1313 if (mbits >= sbits)
1314 {
1315 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1316 mid for folding later. */
1317 cm = quant_band(ctx, X, N, mbits, B,
1318 lowband, LM, lowband_out,
1319 Q15ONE, lowband_scratch, fill);
1320 rebalance = mbits - (rebalance-ctx->remaining_bits);
1321 if (rebalance > 3<<BITRES && itheta!=0)
1322 sbits += rebalance - (3<<BITRES);
1323
1324 /* For a stereo split, the high bits of fill are always zero, so no
1325 folding will be done to the side. */
1326 cm |= quant_band(ctx, Y, N, sbits, B,
1327 NULL, LM, NULL,
1328 side, NULL, fill>>B);
1329 } else {
1330 /* For a stereo split, the high bits of fill are always zero, so no
1331 folding will be done to the side. */
1332 cm = quant_band(ctx, Y, N, sbits, B,
1333 NULL, LM, NULL,
1334 side, NULL, fill>>B);
1335 rebalance = sbits - (rebalance-ctx->remaining_bits);
1336 if (rebalance > 3<<BITRES && itheta!=16384)
1337 mbits += rebalance - (3<<BITRES);
1338 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1339 mid for folding later. */
1340 cm |= quant_band(ctx, X, N, mbits, B,
1341 lowband, LM, lowband_out,
1342 Q15ONE, lowband_scratch, fill);
1343 }
1344 }
1345
1346
1347 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1348 if (resynth)
1349 {
1350 if (N!=2)
1351 stereo_merge(X, Y, mid, N, ctx->arch);
1352 if (inv)
1353 {
1354 int j;
1355 for (j=0;j<N;j++)
1356 Y[j] = -Y[j];
1357 }
1358 }
1359 return cm;
1360 }
1361
1362
quant_all_bands(int encode,const CELTMode * m,int start,int end,celt_norm * X_,celt_norm * Y_,unsigned char * collapse_masks,const celt_ener * bandE,int * pulses,int shortBlocks,int spread,int dual_stereo,int intensity,int * tf_res,opus_int32 total_bits,opus_int32 balance,ec_ctx * ec,int LM,int codedBands,opus_uint32 * seed,int arch)1363 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1364 celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1365 const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1366 int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1367 opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1368 opus_uint32 *seed, int arch)
1369 {
1370 int i;
1371 opus_int32 remaining_bits;
1372 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1373 celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1374 VARDECL(celt_norm, _norm);
1375 celt_norm *lowband_scratch;
1376 int B;
1377 int M;
1378 int lowband_offset;
1379 int update_lowband = 1;
1380 int C = Y_ != NULL ? 2 : 1;
1381 int norm_offset;
1382 #ifdef RESYNTH
1383 int resynth = 1;
1384 #else
1385 int resynth = !encode;
1386 #endif
1387 struct band_ctx ctx;
1388 SAVE_STACK;
1389
1390 M = 1<<LM;
1391 B = shortBlocks ? M : 1;
1392 norm_offset = M*eBands[start];
1393 /* No need to allocate norm for the last band because we don't need an
1394 output in that band. */
1395 ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1396 norm = _norm;
1397 norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1398 /* We can use the last band as scratch space because we don't need that
1399 scratch space for the last band. */
1400 lowband_scratch = X_+M*eBands[m->nbEBands-1];
1401
1402 lowband_offset = 0;
1403 ctx.bandE = bandE;
1404 ctx.ec = ec;
1405 ctx.encode = encode;
1406 ctx.intensity = intensity;
1407 ctx.m = m;
1408 ctx.seed = *seed;
1409 ctx.spread = spread;
1410 ctx.arch = arch;
1411 for (i=start;i<end;i++)
1412 {
1413 opus_int32 tell;
1414 int b;
1415 int N;
1416 opus_int32 curr_balance;
1417 int effective_lowband=-1;
1418 celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1419 int tf_change=0;
1420 unsigned x_cm;
1421 unsigned y_cm;
1422 int last;
1423
1424 ctx.i = i;
1425 last = (i==end-1);
1426
1427 X = X_+M*eBands[i];
1428 if (Y_!=NULL)
1429 Y = Y_+M*eBands[i];
1430 else
1431 Y = NULL;
1432 N = M*eBands[i+1]-M*eBands[i];
1433 tell = ec_tell_frac(ec);
1434
1435 /* Compute how many bits we want to allocate to this band */
1436 if (i != start)
1437 balance -= tell;
1438 remaining_bits = total_bits-tell-1;
1439 ctx.remaining_bits = remaining_bits;
1440 if (i <= codedBands-1)
1441 {
1442 curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1443 b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1444 } else {
1445 b = 0;
1446 }
1447
1448 if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1449 lowband_offset = i;
1450
1451 tf_change = tf_res[i];
1452 ctx.tf_change = tf_change;
1453 if (i>=m->effEBands)
1454 {
1455 X=norm;
1456 if (Y_!=NULL)
1457 Y = norm;
1458 lowband_scratch = NULL;
1459 }
1460 if (i==end-1)
1461 lowband_scratch = NULL;
1462
1463 /* Get a conservative estimate of the collapse_mask's for the bands we're
1464 going to be folding from. */
1465 if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1466 {
1467 int fold_start;
1468 int fold_end;
1469 int fold_i;
1470 /* This ensures we never repeat spectral content within one band */
1471 effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1472 fold_start = lowband_offset;
1473 while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1474 fold_end = lowband_offset-1;
1475 while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1476 x_cm = y_cm = 0;
1477 fold_i = fold_start; do {
1478 x_cm |= collapse_masks[fold_i*C+0];
1479 y_cm |= collapse_masks[fold_i*C+C-1];
1480 } while (++fold_i<fold_end);
1481 }
1482 /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1483 always) be non-zero. */
1484 else
1485 x_cm = y_cm = (1<<B)-1;
1486
1487 if (dual_stereo && i==intensity)
1488 {
1489 int j;
1490
1491 /* Switch off dual stereo to do intensity. */
1492 dual_stereo = 0;
1493 if (resynth)
1494 for (j=0;j<M*eBands[i]-norm_offset;j++)
1495 norm[j] = HALF32(norm[j]+norm2[j]);
1496 }
1497 if (dual_stereo)
1498 {
1499 x_cm = quant_band(&ctx, X, N, b/2, B,
1500 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1501 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
1502 y_cm = quant_band(&ctx, Y, N, b/2, B,
1503 effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1504 last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
1505 } else {
1506 if (Y!=NULL)
1507 {
1508 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1509 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1510 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1511 } else {
1512 x_cm = quant_band(&ctx, X, N, b, B,
1513 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1514 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1515 }
1516 y_cm = x_cm;
1517 }
1518 collapse_masks[i*C+0] = (unsigned char)x_cm;
1519 collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1520 balance += pulses[i] + tell;
1521
1522 /* Update the folding position only as long as we have 1 bit/sample depth. */
1523 update_lowband = b>(N<<BITRES);
1524 }
1525 *seed = ctx.seed;
1526
1527 RESTORE_STACK;
1528 }
1529
1530