1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Copyright (c) 2008-2009 Gregory Maxwell
4 Written by Jean-Marc Valin and Gregory Maxwell */
5 /*
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33
34 #include <math.h>
35 #include "bands.h"
36 #include "modes.h"
37 #include "vq.h"
38 #include "cwrs.h"
39 #include "stack_alloc.h"
40 #include "os_support.h"
41 #include "mathops.h"
42 #include "rate.h"
43 #include "quant_bands.h"
44 #include "pitch.h"
45
hysteresis_decision(opus_val16 val,const opus_val16 * thresholds,const opus_val16 * hysteresis,int N,int prev)46 int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47 {
48 int i;
49 for (i=0;i<N;i++)
50 {
51 if (val < thresholds[i])
52 break;
53 }
54 if (i>prev && val < thresholds[prev]+hysteresis[prev])
55 i=prev;
56 if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57 i=prev;
58 return i;
59 }
60
celt_lcg_rand(opus_uint32 seed)61 opus_uint32 celt_lcg_rand(opus_uint32 seed)
62 {
63 return 1664525 * seed + 1013904223;
64 }
65
66 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67 with this approximation is important because it has an impact on the bit allocation */
bitexact_cos(opus_int16 x)68 opus_int16 bitexact_cos(opus_int16 x)
69 {
70 opus_int32 tmp;
71 opus_int16 x2;
72 tmp = (4096+((opus_int32)(x)*(x)))>>13;
73 celt_sig_assert(tmp<=32767);
74 x2 = tmp;
75 x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76 celt_sig_assert(x2<=32766);
77 return 1+x2;
78 }
79
bitexact_log2tan(int isin,int icos)80 int bitexact_log2tan(int isin,int icos)
81 {
82 int lc;
83 int ls;
84 lc=EC_ILOG(icos);
85 ls=EC_ILOG(isin);
86 icos<<=15-lc;
87 isin<<=15-ls;
88 return (ls-lc)*(1<<11)
89 +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90 -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91 }
92
93 #ifdef FIXED_POINT
94 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)95 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
96 {
97 int i, c, N;
98 const opus_int16 *eBands = m->eBands;
99 (void)arch;
100 N = m->shortMdctSize<<LM;
101 c=0; do {
102 for (i=0;i<end;i++)
103 {
104 int j;
105 opus_val32 maxval=0;
106 opus_val32 sum = 0;
107
108 maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
109 if (maxval > 0)
110 {
111 int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
112 j=eBands[i]<<LM;
113 if (shift>0)
114 {
115 do {
116 sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
117 EXTRACT16(SHR32(X[j+c*N],shift)));
118 } while (++j<eBands[i+1]<<LM);
119 } else {
120 do {
121 sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
122 EXTRACT16(SHL32(X[j+c*N],-shift)));
123 } while (++j<eBands[i+1]<<LM);
124 }
125 /* We're adding one here to ensure the normalized band isn't larger than unity norm */
126 bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
127 } else {
128 bandE[i+c*m->nbEBands] = EPSILON;
129 }
130 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
131 }
132 } while (++c<C);
133 /*printf ("\n");*/
134 }
135
136 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)137 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
138 {
139 int i, c, N;
140 const opus_int16 *eBands = m->eBands;
141 N = M*m->shortMdctSize;
142 c=0; do {
143 i=0; do {
144 opus_val16 g;
145 int j,shift;
146 opus_val16 E;
147 shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
148 E = VSHR32(bandE[i+c*m->nbEBands], shift);
149 g = EXTRACT16(celt_rcp(SHL32(E,3)));
150 j=M*eBands[i]; do {
151 X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
152 } while (++j<M*eBands[i+1]);
153 } while (++i<end);
154 } while (++c<C);
155 }
156
157 #else /* FIXED_POINT */
158 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)159 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
160 {
161 int i, c, N;
162 const opus_int16 *eBands = m->eBands;
163 N = m->shortMdctSize<<LM;
164 c=0; do {
165 for (i=0;i<end;i++)
166 {
167 opus_val32 sum;
168 sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
169 bandE[i+c*m->nbEBands] = celt_sqrt(sum);
170 /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
171 }
172 } while (++c<C);
173 /*printf ("\n");*/
174 }
175
176 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)177 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
178 {
179 int i, c, N;
180 const opus_int16 *eBands = m->eBands;
181 N = M*m->shortMdctSize;
182 c=0; do {
183 for (i=0;i<end;i++)
184 {
185 int j;
186 opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
187 for (j=M*eBands[i];j<M*eBands[i+1];j++)
188 X[j+c*N] = freq[j+c*N]*g;
189 }
190 } while (++c<C);
191 }
192
193 #endif /* FIXED_POINT */
194
195 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
denormalise_bands(const CELTMode * m,const celt_norm * OPUS_RESTRICT X,celt_sig * OPUS_RESTRICT freq,const opus_val16 * bandLogE,int start,int end,int M,int downsample,int silence)196 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
197 celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
198 int end, int M, int downsample, int silence)
199 {
200 int i, N;
201 int bound;
202 celt_sig * OPUS_RESTRICT f;
203 const celt_norm * OPUS_RESTRICT x;
204 const opus_int16 *eBands = m->eBands;
205 N = M*m->shortMdctSize;
206 bound = M*eBands[end];
207 if (downsample!=1)
208 bound = IMIN(bound, N/downsample);
209 if (silence)
210 {
211 bound = 0;
212 start = end = 0;
213 }
214 f = freq;
215 x = X+M*eBands[start];
216 for (i=0;i<M*eBands[start];i++)
217 *f++ = 0;
218 for (i=start;i<end;i++)
219 {
220 int j, band_end;
221 opus_val16 g;
222 opus_val16 lg;
223 #ifdef FIXED_POINT
224 int shift;
225 #endif
226 j=M*eBands[i];
227 band_end = M*eBands[i+1];
228 lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
229 #ifndef FIXED_POINT
230 g = celt_exp2(MIN32(32.f, lg));
231 #else
232 /* Handle the integer part of the log energy */
233 shift = 16-(lg>>DB_SHIFT);
234 if (shift>31)
235 {
236 shift=0;
237 g=0;
238 } else {
239 /* Handle the fractional part. */
240 g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
241 }
242 /* Handle extreme gains with negative shift. */
243 if (shift<0)
244 {
245 /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
246 capping the gain here, which is equivalent to a cap of 18 on lg.
247 This shouldn't trigger unless the bitstream is already corrupted. */
248 if (shift <= -2)
249 {
250 g = 16384;
251 shift = -2;
252 }
253 do {
254 *f++ = SHL32(MULT16_16(*x++, g), -shift);
255 } while (++j<band_end);
256 } else
257 #endif
258 /* Be careful of the fixed-point "else" just above when changing this code */
259 do {
260 *f++ = SHR32(MULT16_16(*x++, g), shift);
261 } while (++j<band_end);
262 }
263 celt_assert(start <= end);
264 OPUS_CLEAR(&freq[bound], N-bound);
265 }
266
267 /* This prevents energy collapse for transients with multiple short MDCTs */
anti_collapse(const CELTMode * m,celt_norm * X_,unsigned char * collapse_masks,int LM,int C,int size,int start,int end,const opus_val16 * logE,const opus_val16 * prev1logE,const opus_val16 * prev2logE,const int * pulses,opus_uint32 seed,int arch)268 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
269 int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
270 const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
271 {
272 int c, i, j, k;
273 for (i=start;i<end;i++)
274 {
275 int N0;
276 opus_val16 thresh, sqrt_1;
277 int depth;
278 #ifdef FIXED_POINT
279 int shift;
280 opus_val32 thresh32;
281 #endif
282
283 N0 = m->eBands[i+1]-m->eBands[i];
284 /* depth in 1/8 bits */
285 celt_sig_assert(pulses[i]>=0);
286 depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
287
288 #ifdef FIXED_POINT
289 thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
290 thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
291 {
292 opus_val32 t;
293 t = N0<<LM;
294 shift = celt_ilog2(t)>>1;
295 t = SHL32(t, (7-shift)<<1);
296 sqrt_1 = celt_rsqrt_norm(t);
297 }
298 #else
299 thresh = .5f*celt_exp2(-.125f*depth);
300 sqrt_1 = celt_rsqrt(N0<<LM);
301 #endif
302
303 c=0; do
304 {
305 celt_norm *X;
306 opus_val16 prev1;
307 opus_val16 prev2;
308 opus_val32 Ediff;
309 opus_val16 r;
310 int renormalize=0;
311 prev1 = prev1logE[c*m->nbEBands+i];
312 prev2 = prev2logE[c*m->nbEBands+i];
313 if (C==1)
314 {
315 prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
316 prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
317 }
318 Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
319 Ediff = MAX32(0, Ediff);
320
321 #ifdef FIXED_POINT
322 if (Ediff < 16384)
323 {
324 opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
325 r = 2*MIN16(16383,r32);
326 } else {
327 r = 0;
328 }
329 if (LM==3)
330 r = MULT16_16_Q14(23170, MIN32(23169, r));
331 r = SHR16(MIN16(thresh, r),1);
332 r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
333 #else
334 /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
335 short blocks don't have the same energy as long */
336 r = 2.f*celt_exp2(-Ediff);
337 if (LM==3)
338 r *= 1.41421356f;
339 r = MIN16(thresh, r);
340 r = r*sqrt_1;
341 #endif
342 X = X_+c*size+(m->eBands[i]<<LM);
343 for (k=0;k<1<<LM;k++)
344 {
345 /* Detect collapse */
346 if (!(collapse_masks[i*C+c]&1<<k))
347 {
348 /* Fill with noise */
349 for (j=0;j<N0;j++)
350 {
351 seed = celt_lcg_rand(seed);
352 X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
353 }
354 renormalize = 1;
355 }
356 }
357 /* We just added some energy, so we need to renormalise */
358 if (renormalize)
359 renormalise_vector(X, N0<<LM, Q15ONE, arch);
360 } while (++c<C);
361 }
362 }
363
364 /* Compute the weights to use for optimizing normalized distortion across
365 channels. We use the amplitude to weight square distortion, which means
366 that we use the square root of the value we would have been using if we
367 wanted to minimize the MSE in the non-normalized domain. This roughly
368 corresponds to some quick-and-dirty perceptual experiments I ran to
369 measure inter-aural masking (there doesn't seem to be any published data
370 on the topic). */
compute_channel_weights(celt_ener Ex,celt_ener Ey,opus_val16 w[2])371 static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
372 {
373 celt_ener minE;
374 #if FIXED_POINT
375 int shift;
376 #endif
377 minE = MIN32(Ex, Ey);
378 /* Adjustment to make the weights a bit more conservative. */
379 Ex = ADD32(Ex, minE/3);
380 Ey = ADD32(Ey, minE/3);
381 #if FIXED_POINT
382 shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
383 #endif
384 w[0] = VSHR32(Ex, shift);
385 w[1] = VSHR32(Ey, shift);
386 }
387
intensity_stereo(const CELTMode * m,celt_norm * OPUS_RESTRICT X,const celt_norm * OPUS_RESTRICT Y,const celt_ener * bandE,int bandID,int N)388 static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
389 {
390 int i = bandID;
391 int j;
392 opus_val16 a1, a2;
393 opus_val16 left, right;
394 opus_val16 norm;
395 #ifdef FIXED_POINT
396 int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
397 #endif
398 left = VSHR32(bandE[i],shift);
399 right = VSHR32(bandE[i+m->nbEBands],shift);
400 norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
401 a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
402 a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
403 for (j=0;j<N;j++)
404 {
405 celt_norm r, l;
406 l = X[j];
407 r = Y[j];
408 X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
409 /* Side is not encoded, no need to calculate */
410 }
411 }
412
stereo_split(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,int N)413 static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
414 {
415 int j;
416 for (j=0;j<N;j++)
417 {
418 opus_val32 r, l;
419 l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
420 r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
421 X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
422 Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
423 }
424 }
425
stereo_merge(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,opus_val16 mid,int N,int arch)426 static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
427 {
428 int j;
429 opus_val32 xp=0, side=0;
430 opus_val32 El, Er;
431 opus_val16 mid2;
432 #ifdef FIXED_POINT
433 int kl, kr;
434 #endif
435 opus_val32 t, lgain, rgain;
436
437 /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
438 dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
439 /* Compensating for the mid normalization */
440 xp = MULT16_32_Q15(mid, xp);
441 /* mid and side are in Q15, not Q14 like X and Y */
442 mid2 = SHR16(mid, 1);
443 El = MULT16_16(mid2, mid2) + side - 2*xp;
444 Er = MULT16_16(mid2, mid2) + side + 2*xp;
445 if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
446 {
447 OPUS_COPY(Y, X, N);
448 return;
449 }
450
451 #ifdef FIXED_POINT
452 kl = celt_ilog2(El)>>1;
453 kr = celt_ilog2(Er)>>1;
454 #endif
455 t = VSHR32(El, (kl-7)<<1);
456 lgain = celt_rsqrt_norm(t);
457 t = VSHR32(Er, (kr-7)<<1);
458 rgain = celt_rsqrt_norm(t);
459
460 #ifdef FIXED_POINT
461 if (kl < 7)
462 kl = 7;
463 if (kr < 7)
464 kr = 7;
465 #endif
466
467 for (j=0;j<N;j++)
468 {
469 celt_norm r, l;
470 /* Apply mid scaling (side is already scaled) */
471 l = MULT16_16_P15(mid, X[j]);
472 r = Y[j];
473 X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
474 Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
475 }
476 }
477
478 /* Decide whether we should spread the pulses in the current frame */
spreading_decision(const CELTMode * m,const celt_norm * X,int * average,int last_decision,int * hf_average,int * tapset_decision,int update_hf,int end,int C,int M,const int * spread_weight)479 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
480 int last_decision, int *hf_average, int *tapset_decision, int update_hf,
481 int end, int C, int M, const int *spread_weight)
482 {
483 int i, c, N0;
484 int sum = 0, nbBands=0;
485 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
486 int decision;
487 int hf_sum=0;
488
489 celt_assert(end>0);
490
491 N0 = M*m->shortMdctSize;
492
493 if (M*(eBands[end]-eBands[end-1]) <= 8)
494 return SPREAD_NONE;
495 c=0; do {
496 for (i=0;i<end;i++)
497 {
498 int j, N, tmp=0;
499 int tcount[3] = {0,0,0};
500 const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
501 N = M*(eBands[i+1]-eBands[i]);
502 if (N<=8)
503 continue;
504 /* Compute rough CDF of |x[j]| */
505 for (j=0;j<N;j++)
506 {
507 opus_val32 x2N; /* Q13 */
508
509 x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
510 if (x2N < QCONST16(0.25f,13))
511 tcount[0]++;
512 if (x2N < QCONST16(0.0625f,13))
513 tcount[1]++;
514 if (x2N < QCONST16(0.015625f,13))
515 tcount[2]++;
516 }
517
518 /* Only include four last bands (8 kHz and up) */
519 if (i>m->nbEBands-4)
520 hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
521 tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
522 sum += tmp*spread_weight[i];
523 nbBands+=spread_weight[i];
524 }
525 } while (++c<C);
526
527 if (update_hf)
528 {
529 if (hf_sum)
530 hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
531 *hf_average = (*hf_average+hf_sum)>>1;
532 hf_sum = *hf_average;
533 if (*tapset_decision==2)
534 hf_sum += 4;
535 else if (*tapset_decision==0)
536 hf_sum -= 4;
537 if (hf_sum > 22)
538 *tapset_decision=2;
539 else if (hf_sum > 18)
540 *tapset_decision=1;
541 else
542 *tapset_decision=0;
543 }
544 /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
545 celt_assert(nbBands>0); /* end has to be non-zero */
546 celt_assert(sum>=0);
547 sum = celt_udiv((opus_int32)sum<<8, nbBands);
548 /* Recursive averaging */
549 sum = (sum+*average)>>1;
550 *average = sum;
551 /* Hysteresis */
552 sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
553 if (sum < 80)
554 {
555 decision = SPREAD_AGGRESSIVE;
556 } else if (sum < 256)
557 {
558 decision = SPREAD_NORMAL;
559 } else if (sum < 384)
560 {
561 decision = SPREAD_LIGHT;
562 } else {
563 decision = SPREAD_NONE;
564 }
565 #ifdef FUZZING
566 decision = rand()&0x3;
567 *tapset_decision=rand()%3;
568 #endif
569 return decision;
570 }
571
572 /* Indexing table for converting from natural Hadamard to ordery Hadamard
573 This is essentially a bit-reversed Gray, on top of which we've added
574 an inversion of the order because we want the DC at the end rather than
575 the beginning. The lines are for N=2, 4, 8, 16 */
576 static const int ordery_table[] = {
577 1, 0,
578 3, 0, 2, 1,
579 7, 0, 4, 3, 6, 1, 5, 2,
580 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5,
581 };
582
deinterleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)583 static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
584 {
585 int i,j;
586 VARDECL(celt_norm, tmp);
587 int N;
588 SAVE_STACK;
589 N = N0*stride;
590 ALLOC(tmp, N, celt_norm);
591 celt_assert(stride>0);
592 if (hadamard)
593 {
594 const int *ordery = ordery_table+stride-2;
595 for (i=0;i<stride;i++)
596 {
597 for (j=0;j<N0;j++)
598 tmp[ordery[i]*N0+j] = X[j*stride+i];
599 }
600 } else {
601 for (i=0;i<stride;i++)
602 for (j=0;j<N0;j++)
603 tmp[i*N0+j] = X[j*stride+i];
604 }
605 OPUS_COPY(X, tmp, N);
606 RESTORE_STACK;
607 }
608
interleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)609 static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
610 {
611 int i,j;
612 VARDECL(celt_norm, tmp);
613 int N;
614 SAVE_STACK;
615 N = N0*stride;
616 ALLOC(tmp, N, celt_norm);
617 if (hadamard)
618 {
619 const int *ordery = ordery_table+stride-2;
620 for (i=0;i<stride;i++)
621 for (j=0;j<N0;j++)
622 tmp[j*stride+i] = X[ordery[i]*N0+j];
623 } else {
624 for (i=0;i<stride;i++)
625 for (j=0;j<N0;j++)
626 tmp[j*stride+i] = X[i*N0+j];
627 }
628 OPUS_COPY(X, tmp, N);
629 RESTORE_STACK;
630 }
631
haar1(celt_norm * X,int N0,int stride)632 void haar1(celt_norm *X, int N0, int stride)
633 {
634 int i, j;
635 N0 >>= 1;
636 for (i=0;i<stride;i++)
637 for (j=0;j<N0;j++)
638 {
639 opus_val32 tmp1, tmp2;
640 tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
641 tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
642 X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
643 X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
644 }
645 }
646
compute_qn(int N,int b,int offset,int pulse_cap,int stereo)647 static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
648 {
649 static const opus_int16 exp2_table8[8] =
650 {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
651 int qn, qb;
652 int N2 = 2*N-1;
653 if (stereo && N==2)
654 N2--;
655 /* The upper limit ensures that in a stereo split with itheta==16384, we'll
656 always have enough bits left over to code at least one pulse in the
657 side; otherwise it would collapse, since it doesn't get folded. */
658 qb = celt_sudiv(b+N2*offset, N2);
659 qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
660
661 qb = IMIN(8<<BITRES, qb);
662
663 if (qb<(1<<BITRES>>1)) {
664 qn = 1;
665 } else {
666 qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
667 qn = (qn+1)>>1<<1;
668 }
669 celt_assert(qn <= 256);
670 return qn;
671 }
672
673 struct band_ctx {
674 int encode;
675 int resynth;
676 const CELTMode *m;
677 int i;
678 int intensity;
679 int spread;
680 int tf_change;
681 ec_ctx *ec;
682 opus_int32 remaining_bits;
683 const celt_ener *bandE;
684 opus_uint32 seed;
685 int arch;
686 int theta_round;
687 int disable_inv;
688 int avoid_split_noise;
689 };
690
691 struct split_ctx {
692 int inv;
693 int imid;
694 int iside;
695 int delta;
696 int itheta;
697 int qalloc;
698 };
699
compute_theta(struct band_ctx * ctx,struct split_ctx * sctx,celt_norm * X,celt_norm * Y,int N,int * b,int B,int B0,int LM,int stereo,int * fill)700 static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
701 celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
702 int LM,
703 int stereo, int *fill)
704 {
705 int qn;
706 int itheta=0;
707 int delta;
708 int imid, iside;
709 int qalloc;
710 int pulse_cap;
711 int offset;
712 opus_int32 tell;
713 int inv=0;
714 int encode;
715 const CELTMode *m;
716 int i;
717 int intensity;
718 ec_ctx *ec;
719 const celt_ener *bandE;
720
721 encode = ctx->encode;
722 m = ctx->m;
723 i = ctx->i;
724 intensity = ctx->intensity;
725 ec = ctx->ec;
726 bandE = ctx->bandE;
727
728 /* Decide on the resolution to give to the split parameter theta */
729 pulse_cap = m->logN[i]+LM*(1<<BITRES);
730 offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
731 qn = compute_qn(N, *b, offset, pulse_cap, stereo);
732 if (stereo && i>=intensity)
733 qn = 1;
734 if (encode)
735 {
736 /* theta is the atan() of the ratio between the (normalized)
737 side and mid. With just that parameter, we can re-scale both
738 mid and side because we know that 1) they have unit norm and
739 2) they are orthogonal. */
740 itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
741 }
742 tell = ec_tell_frac(ec);
743 if (qn!=1)
744 {
745 if (encode)
746 {
747 if (!stereo || ctx->theta_round == 0)
748 {
749 itheta = (itheta*(opus_int32)qn+8192)>>14;
750 if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
751 {
752 /* Check if the selected value of theta will cause the bit allocation
753 to inject noise on one side. If so, make sure the energy of that side
754 is zero. */
755 int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
756 imid = bitexact_cos((opus_int16)unquantized);
757 iside = bitexact_cos((opus_int16)(16384-unquantized));
758 delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
759 if (delta > *b)
760 itheta = qn;
761 else if (delta < -*b)
762 itheta = 0;
763 }
764 } else {
765 int down;
766 /* Bias quantization towards itheta=0 and itheta=16384. */
767 int bias = itheta > 8192 ? 32767/qn : -32767/qn;
768 down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
769 if (ctx->theta_round < 0)
770 itheta = down;
771 else
772 itheta = down+1;
773 }
774 }
775 /* Entropy coding of the angle. We use a uniform pdf for the
776 time split, a step for stereo, and a triangular one for the rest. */
777 if (stereo && N>2)
778 {
779 int p0 = 3;
780 int x = itheta;
781 int x0 = qn/2;
782 int ft = p0*(x0+1) + x0;
783 /* Use a probability of p0 up to itheta=8192 and then use 1 after */
784 if (encode)
785 {
786 ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
787 } else {
788 int fs;
789 fs=ec_decode(ec,ft);
790 if (fs<(x0+1)*p0)
791 x=fs/p0;
792 else
793 x=x0+1+(fs-(x0+1)*p0);
794 ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
795 itheta = x;
796 }
797 } else if (B0>1 || stereo) {
798 /* Uniform pdf */
799 if (encode)
800 ec_enc_uint(ec, itheta, qn+1);
801 else
802 itheta = ec_dec_uint(ec, qn+1);
803 } else {
804 int fs=1, ft;
805 ft = ((qn>>1)+1)*((qn>>1)+1);
806 if (encode)
807 {
808 int fl;
809
810 fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
811 fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
812 ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
813
814 ec_encode(ec, fl, fl+fs, ft);
815 } else {
816 /* Triangular pdf */
817 int fl=0;
818 int fm;
819 fm = ec_decode(ec, ft);
820
821 if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
822 {
823 itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
824 fs = itheta + 1;
825 fl = itheta*(itheta + 1)>>1;
826 }
827 else
828 {
829 itheta = (2*(qn + 1)
830 - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
831 fs = qn + 1 - itheta;
832 fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
833 }
834
835 ec_dec_update(ec, fl, fl+fs, ft);
836 }
837 }
838 celt_assert(itheta>=0);
839 itheta = celt_udiv((opus_int32)itheta*16384, qn);
840 if (encode && stereo)
841 {
842 if (itheta==0)
843 intensity_stereo(m, X, Y, bandE, i, N);
844 else
845 stereo_split(X, Y, N);
846 }
847 /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
848 Let's do that at higher complexity */
849 } else if (stereo) {
850 if (encode)
851 {
852 inv = itheta > 8192 && !ctx->disable_inv;
853 if (inv)
854 {
855 int j;
856 for (j=0;j<N;j++)
857 Y[j] = -Y[j];
858 }
859 intensity_stereo(m, X, Y, bandE, i, N);
860 }
861 if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
862 {
863 if (encode)
864 ec_enc_bit_logp(ec, inv, 2);
865 else
866 inv = ec_dec_bit_logp(ec, 2);
867 } else
868 inv = 0;
869 /* inv flag override to avoid problems with downmixing. */
870 if (ctx->disable_inv)
871 inv = 0;
872 itheta = 0;
873 }
874 qalloc = ec_tell_frac(ec) - tell;
875 *b -= qalloc;
876
877 if (itheta == 0)
878 {
879 imid = 32767;
880 iside = 0;
881 *fill &= (1<<B)-1;
882 delta = -16384;
883 } else if (itheta == 16384)
884 {
885 imid = 0;
886 iside = 32767;
887 *fill &= ((1<<B)-1)<<B;
888 delta = 16384;
889 } else {
890 imid = bitexact_cos((opus_int16)itheta);
891 iside = bitexact_cos((opus_int16)(16384-itheta));
892 /* This is the mid vs side allocation that minimizes squared error
893 in that band. */
894 delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
895 }
896
897 sctx->inv = inv;
898 sctx->imid = imid;
899 sctx->iside = iside;
900 sctx->delta = delta;
901 sctx->itheta = itheta;
902 sctx->qalloc = qalloc;
903 }
quant_band_n1(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int b,celt_norm * lowband_out)904 static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
905 celt_norm *lowband_out)
906 {
907 int c;
908 int stereo;
909 celt_norm *x = X;
910 int encode;
911 ec_ctx *ec;
912
913 encode = ctx->encode;
914 ec = ctx->ec;
915
916 stereo = Y != NULL;
917 c=0; do {
918 int sign=0;
919 if (ctx->remaining_bits>=1<<BITRES)
920 {
921 if (encode)
922 {
923 sign = x[0]<0;
924 ec_enc_bits(ec, sign, 1);
925 } else {
926 sign = ec_dec_bits(ec, 1);
927 }
928 ctx->remaining_bits -= 1<<BITRES;
929 b-=1<<BITRES;
930 }
931 if (ctx->resynth)
932 x[0] = sign ? -NORM_SCALING : NORM_SCALING;
933 x = Y;
934 } while (++c<1+stereo);
935 if (lowband_out)
936 lowband_out[0] = SHR16(X[0],4);
937 return 1;
938 }
939
940 /* This function is responsible for encoding and decoding a mono partition.
941 It can split the band in two and transmit the energy difference with
942 the two half-bands. It can be called recursively so bands can end up being
943 split in 8 parts. */
quant_partition(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,opus_val16 gain,int fill)944 static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
945 int N, int b, int B, celt_norm *lowband,
946 int LM,
947 opus_val16 gain, int fill)
948 {
949 const unsigned char *cache;
950 int q;
951 int curr_bits;
952 int imid=0, iside=0;
953 int B0=B;
954 opus_val16 mid=0, side=0;
955 unsigned cm=0;
956 celt_norm *Y=NULL;
957 int encode;
958 const CELTMode *m;
959 int i;
960 int spread;
961 ec_ctx *ec;
962
963 encode = ctx->encode;
964 m = ctx->m;
965 i = ctx->i;
966 spread = ctx->spread;
967 ec = ctx->ec;
968
969 /* If we need 1.5 more bit than we can produce, split the band in two. */
970 cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
971 if (LM != -1 && b > cache[cache[0]]+12 && N>2)
972 {
973 int mbits, sbits, delta;
974 int itheta;
975 int qalloc;
976 struct split_ctx sctx;
977 celt_norm *next_lowband2=NULL;
978 opus_int32 rebalance;
979
980 N >>= 1;
981 Y = X+N;
982 LM -= 1;
983 if (B==1)
984 fill = (fill&1)|(fill<<1);
985 B = (B+1)>>1;
986
987 compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
988 imid = sctx.imid;
989 iside = sctx.iside;
990 delta = sctx.delta;
991 itheta = sctx.itheta;
992 qalloc = sctx.qalloc;
993 #ifdef FIXED_POINT
994 mid = imid;
995 side = iside;
996 #else
997 mid = (1.f/32768)*imid;
998 side = (1.f/32768)*iside;
999 #endif
1000
1001 /* Give more bits to low-energy MDCTs than they would otherwise deserve */
1002 if (B0>1 && (itheta&0x3fff))
1003 {
1004 if (itheta > 8192)
1005 /* Rough approximation for pre-echo masking */
1006 delta -= delta>>(4-LM);
1007 else
1008 /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
1009 delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
1010 }
1011 mbits = IMAX(0, IMIN(b, (b-delta)/2));
1012 sbits = b-mbits;
1013 ctx->remaining_bits -= qalloc;
1014
1015 if (lowband)
1016 next_lowband2 = lowband+N; /* >32-bit split case */
1017
1018 rebalance = ctx->remaining_bits;
1019 if (mbits >= sbits)
1020 {
1021 cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
1022 MULT16_16_P15(gain,mid), fill);
1023 rebalance = mbits - (rebalance-ctx->remaining_bits);
1024 if (rebalance > 3<<BITRES && itheta!=0)
1025 sbits += rebalance - (3<<BITRES);
1026 cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1027 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1028 } else {
1029 cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1030 MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1031 rebalance = sbits - (rebalance-ctx->remaining_bits);
1032 if (rebalance > 3<<BITRES && itheta!=16384)
1033 mbits += rebalance - (3<<BITRES);
1034 cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
1035 MULT16_16_P15(gain,mid), fill);
1036 }
1037 } else {
1038 /* This is the basic no-split case */
1039 q = bits2pulses(m, i, LM, b);
1040 curr_bits = pulses2bits(m, i, LM, q);
1041 ctx->remaining_bits -= curr_bits;
1042
1043 /* Ensures we can never bust the budget */
1044 while (ctx->remaining_bits < 0 && q > 0)
1045 {
1046 ctx->remaining_bits += curr_bits;
1047 q--;
1048 curr_bits = pulses2bits(m, i, LM, q);
1049 ctx->remaining_bits -= curr_bits;
1050 }
1051
1052 if (q!=0)
1053 {
1054 int K = get_pulses(q);
1055
1056 /* Finally do the actual quantization */
1057 if (encode)
1058 {
1059 cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
1060 } else {
1061 cm = alg_unquant(X, N, K, spread, B, ec, gain);
1062 }
1063 } else {
1064 /* If there's no pulse, fill the band anyway */
1065 int j;
1066 if (ctx->resynth)
1067 {
1068 unsigned cm_mask;
1069 /* B can be as large as 16, so this shift might overflow an int on a
1070 16-bit platform; use a long to get defined behavior.*/
1071 cm_mask = (unsigned)(1UL<<B)-1;
1072 fill &= cm_mask;
1073 if (!fill)
1074 {
1075 OPUS_CLEAR(X, N);
1076 } else {
1077 if (lowband == NULL)
1078 {
1079 /* Noise */
1080 for (j=0;j<N;j++)
1081 {
1082 ctx->seed = celt_lcg_rand(ctx->seed);
1083 X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
1084 }
1085 cm = cm_mask;
1086 } else {
1087 /* Folded spectrum */
1088 for (j=0;j<N;j++)
1089 {
1090 opus_val16 tmp;
1091 ctx->seed = celt_lcg_rand(ctx->seed);
1092 /* About 48 dB below the "normal" folding level */
1093 tmp = QCONST16(1.0f/256, 10);
1094 tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1095 X[j] = lowband[j]+tmp;
1096 }
1097 cm = fill;
1098 }
1099 renormalise_vector(X, N, gain, ctx->arch);
1100 }
1101 }
1102 }
1103 }
1104
1105 return cm;
1106 }
1107
1108
1109 /* This function is responsible for encoding and decoding a band for the mono case. */
quant_band(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,opus_val16 gain,celt_norm * lowband_scratch,int fill)1110 static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1111 int N, int b, int B, celt_norm *lowband,
1112 int LM, celt_norm *lowband_out,
1113 opus_val16 gain, celt_norm *lowband_scratch, int fill)
1114 {
1115 int N0=N;
1116 int N_B=N;
1117 int N_B0;
1118 int B0=B;
1119 int time_divide=0;
1120 int recombine=0;
1121 int longBlocks;
1122 unsigned cm=0;
1123 int k;
1124 int encode;
1125 int tf_change;
1126
1127 encode = ctx->encode;
1128 tf_change = ctx->tf_change;
1129
1130 longBlocks = B0==1;
1131
1132 N_B = celt_udiv(N_B, B);
1133
1134 /* Special case for one sample */
1135 if (N==1)
1136 {
1137 return quant_band_n1(ctx, X, NULL, b, lowband_out);
1138 }
1139
1140 if (tf_change>0)
1141 recombine = tf_change;
1142 /* Band recombining to increase frequency resolution */
1143
1144 if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1145 {
1146 OPUS_COPY(lowband_scratch, lowband, N);
1147 lowband = lowband_scratch;
1148 }
1149
1150 for (k=0;k<recombine;k++)
1151 {
1152 static const unsigned char bit_interleave_table[16]={
1153 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1154 };
1155 if (encode)
1156 haar1(X, N>>k, 1<<k);
1157 if (lowband)
1158 haar1(lowband, N>>k, 1<<k);
1159 fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1160 }
1161 B>>=recombine;
1162 N_B<<=recombine;
1163
1164 /* Increasing the time resolution */
1165 while ((N_B&1) == 0 && tf_change<0)
1166 {
1167 if (encode)
1168 haar1(X, N_B, B);
1169 if (lowband)
1170 haar1(lowband, N_B, B);
1171 fill |= fill<<B;
1172 B <<= 1;
1173 N_B >>= 1;
1174 time_divide++;
1175 tf_change++;
1176 }
1177 B0=B;
1178 N_B0 = N_B;
1179
1180 /* Reorganize the samples in time order instead of frequency order */
1181 if (B0>1)
1182 {
1183 if (encode)
1184 deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1185 if (lowband)
1186 deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1187 }
1188
1189 cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
1190
1191 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1192 if (ctx->resynth)
1193 {
1194 /* Undo the sample reorganization going from time order to frequency order */
1195 if (B0>1)
1196 interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1197
1198 /* Undo time-freq changes that we did earlier */
1199 N_B = N_B0;
1200 B = B0;
1201 for (k=0;k<time_divide;k++)
1202 {
1203 B >>= 1;
1204 N_B <<= 1;
1205 cm |= cm>>B;
1206 haar1(X, N_B, B);
1207 }
1208
1209 for (k=0;k<recombine;k++)
1210 {
1211 static const unsigned char bit_deinterleave_table[16]={
1212 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1213 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1214 };
1215 cm = bit_deinterleave_table[cm];
1216 haar1(X, N0>>k, 1<<k);
1217 }
1218 B<<=recombine;
1219
1220 /* Scale output for later folding */
1221 if (lowband_out)
1222 {
1223 int j;
1224 opus_val16 n;
1225 n = celt_sqrt(SHL32(EXTEND32(N0),22));
1226 for (j=0;j<N0;j++)
1227 lowband_out[j] = MULT16_16_Q15(n,X[j]);
1228 }
1229 cm &= (1<<B)-1;
1230 }
1231 return cm;
1232 }
1233
1234
1235 /* This function is responsible for encoding and decoding a band for the stereo case. */
quant_band_stereo(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,celt_norm * lowband_scratch,int fill)1236 static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1237 int N, int b, int B, celt_norm *lowband,
1238 int LM, celt_norm *lowband_out,
1239 celt_norm *lowband_scratch, int fill)
1240 {
1241 int imid=0, iside=0;
1242 int inv = 0;
1243 opus_val16 mid=0, side=0;
1244 unsigned cm=0;
1245 int mbits, sbits, delta;
1246 int itheta;
1247 int qalloc;
1248 struct split_ctx sctx;
1249 int orig_fill;
1250 int encode;
1251 ec_ctx *ec;
1252
1253 encode = ctx->encode;
1254 ec = ctx->ec;
1255
1256 /* Special case for one sample */
1257 if (N==1)
1258 {
1259 return quant_band_n1(ctx, X, Y, b, lowband_out);
1260 }
1261
1262 orig_fill = fill;
1263
1264 compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
1265 inv = sctx.inv;
1266 imid = sctx.imid;
1267 iside = sctx.iside;
1268 delta = sctx.delta;
1269 itheta = sctx.itheta;
1270 qalloc = sctx.qalloc;
1271 #ifdef FIXED_POINT
1272 mid = imid;
1273 side = iside;
1274 #else
1275 mid = (1.f/32768)*imid;
1276 side = (1.f/32768)*iside;
1277 #endif
1278
1279 /* This is a special case for N=2 that only works for stereo and takes
1280 advantage of the fact that mid and side are orthogonal to encode
1281 the side with just one bit. */
1282 if (N==2)
1283 {
1284 int c;
1285 int sign=0;
1286 celt_norm *x2, *y2;
1287 mbits = b;
1288 sbits = 0;
1289 /* Only need one bit for the side. */
1290 if (itheta != 0 && itheta != 16384)
1291 sbits = 1<<BITRES;
1292 mbits -= sbits;
1293 c = itheta > 8192;
1294 ctx->remaining_bits -= qalloc+sbits;
1295
1296 x2 = c ? Y : X;
1297 y2 = c ? X : Y;
1298 if (sbits)
1299 {
1300 if (encode)
1301 {
1302 /* Here we only need to encode a sign for the side. */
1303 sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
1304 ec_enc_bits(ec, sign, 1);
1305 } else {
1306 sign = ec_dec_bits(ec, 1);
1307 }
1308 }
1309 sign = 1-2*sign;
1310 /* We use orig_fill here because we want to fold the side, but if
1311 itheta==16384, we'll have cleared the low bits of fill. */
1312 cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1313 lowband_scratch, orig_fill);
1314 /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1315 and there's no need to worry about mixing with the other channel. */
1316 y2[0] = -sign*x2[1];
1317 y2[1] = sign*x2[0];
1318 if (ctx->resynth)
1319 {
1320 celt_norm tmp;
1321 X[0] = MULT16_16_Q15(mid, X[0]);
1322 X[1] = MULT16_16_Q15(mid, X[1]);
1323 Y[0] = MULT16_16_Q15(side, Y[0]);
1324 Y[1] = MULT16_16_Q15(side, Y[1]);
1325 tmp = X[0];
1326 X[0] = SUB16(tmp,Y[0]);
1327 Y[0] = ADD16(tmp,Y[0]);
1328 tmp = X[1];
1329 X[1] = SUB16(tmp,Y[1]);
1330 Y[1] = ADD16(tmp,Y[1]);
1331 }
1332 } else {
1333 /* "Normal" split code */
1334 opus_int32 rebalance;
1335
1336 mbits = IMAX(0, IMIN(b, (b-delta)/2));
1337 sbits = b-mbits;
1338 ctx->remaining_bits -= qalloc;
1339
1340 rebalance = ctx->remaining_bits;
1341 if (mbits >= sbits)
1342 {
1343 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1344 mid for folding later. */
1345 cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1346 lowband_scratch, fill);
1347 rebalance = mbits - (rebalance-ctx->remaining_bits);
1348 if (rebalance > 3<<BITRES && itheta!=0)
1349 sbits += rebalance - (3<<BITRES);
1350
1351 /* For a stereo split, the high bits of fill are always zero, so no
1352 folding will be done to the side. */
1353 cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1354 } else {
1355 /* For a stereo split, the high bits of fill are always zero, so no
1356 folding will be done to the side. */
1357 cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1358 rebalance = sbits - (rebalance-ctx->remaining_bits);
1359 if (rebalance > 3<<BITRES && itheta!=16384)
1360 mbits += rebalance - (3<<BITRES);
1361 /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1362 mid for folding later. */
1363 cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1364 lowband_scratch, fill);
1365 }
1366 }
1367
1368
1369 /* This code is used by the decoder and by the resynthesis-enabled encoder */
1370 if (ctx->resynth)
1371 {
1372 if (N!=2)
1373 stereo_merge(X, Y, mid, N, ctx->arch);
1374 if (inv)
1375 {
1376 int j;
1377 for (j=0;j<N;j++)
1378 Y[j] = -Y[j];
1379 }
1380 }
1381 return cm;
1382 }
1383
special_hybrid_folding(const CELTMode * m,celt_norm * norm,celt_norm * norm2,int start,int M,int dual_stereo)1384 static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1385 {
1386 int n1, n2;
1387 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1388 n1 = M*(eBands[start+1]-eBands[start]);
1389 n2 = M*(eBands[start+2]-eBands[start+1]);
1390 /* Duplicate enough of the first band folding data to be able to fold the second band.
1391 Copies no data for CELT-only mode. */
1392 OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1393 if (dual_stereo)
1394 OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1395 }
1396
quant_all_bands(int encode,const CELTMode * m,int start,int end,celt_norm * X_,celt_norm * Y_,unsigned char * collapse_masks,const celt_ener * bandE,int * pulses,int shortBlocks,int spread,int dual_stereo,int intensity,int * tf_res,opus_int32 total_bits,opus_int32 balance,ec_ctx * ec,int LM,int codedBands,opus_uint32 * seed,int complexity,int arch,int disable_inv)1397 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1398 celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1399 const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1400 int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1401 opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1402 opus_uint32 *seed, int complexity, int arch, int disable_inv)
1403 {
1404 int i;
1405 opus_int32 remaining_bits;
1406 const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1407 celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1408 VARDECL(celt_norm, _norm);
1409 VARDECL(celt_norm, _lowband_scratch);
1410 VARDECL(celt_norm, X_save);
1411 VARDECL(celt_norm, Y_save);
1412 VARDECL(celt_norm, X_save2);
1413 VARDECL(celt_norm, Y_save2);
1414 VARDECL(celt_norm, norm_save2);
1415 int resynth_alloc;
1416 celt_norm *lowband_scratch;
1417 int B;
1418 int M;
1419 int lowband_offset;
1420 int update_lowband = 1;
1421 int C = Y_ != NULL ? 2 : 1;
1422 int norm_offset;
1423 int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1424 #ifdef RESYNTH
1425 int resynth = 1;
1426 #else
1427 int resynth = !encode || theta_rdo;
1428 #endif
1429 struct band_ctx ctx;
1430 SAVE_STACK;
1431
1432 M = 1<<LM;
1433 B = shortBlocks ? M : 1;
1434 norm_offset = M*eBands[start];
1435 /* No need to allocate norm for the last band because we don't need an
1436 output in that band. */
1437 ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1438 norm = _norm;
1439 norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1440
1441 /* For decoding, we can use the last band as scratch space because we don't need that
1442 scratch space for the last band and we don't care about the data there until we're
1443 decoding the last band. */
1444 if (encode && resynth)
1445 resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1446 else
1447 resynth_alloc = ALLOC_NONE;
1448 ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1449 if (encode && resynth)
1450 lowband_scratch = _lowband_scratch;
1451 else
1452 lowband_scratch = X_+M*eBands[m->nbEBands-1];
1453 ALLOC(X_save, resynth_alloc, celt_norm);
1454 ALLOC(Y_save, resynth_alloc, celt_norm);
1455 ALLOC(X_save2, resynth_alloc, celt_norm);
1456 ALLOC(Y_save2, resynth_alloc, celt_norm);
1457 ALLOC(norm_save2, resynth_alloc, celt_norm);
1458
1459 lowband_offset = 0;
1460 ctx.bandE = bandE;
1461 ctx.ec = ec;
1462 ctx.encode = encode;
1463 ctx.intensity = intensity;
1464 ctx.m = m;
1465 ctx.seed = *seed;
1466 ctx.spread = spread;
1467 ctx.arch = arch;
1468 ctx.disable_inv = disable_inv;
1469 ctx.resynth = resynth;
1470 ctx.theta_round = 0;
1471 /* Avoid injecting noise in the first band on transients. */
1472 ctx.avoid_split_noise = B > 1;
1473 for (i=start;i<end;i++)
1474 {
1475 opus_int32 tell;
1476 int b;
1477 int N;
1478 opus_int32 curr_balance;
1479 int effective_lowband=-1;
1480 celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1481 int tf_change=0;
1482 unsigned x_cm;
1483 unsigned y_cm;
1484 int last;
1485
1486 ctx.i = i;
1487 last = (i==end-1);
1488
1489 X = X_+M*eBands[i];
1490 if (Y_!=NULL)
1491 Y = Y_+M*eBands[i];
1492 else
1493 Y = NULL;
1494 N = M*eBands[i+1]-M*eBands[i];
1495 celt_assert(N > 0);
1496 tell = ec_tell_frac(ec);
1497
1498 /* Compute how many bits we want to allocate to this band */
1499 if (i != start)
1500 balance -= tell;
1501 remaining_bits = total_bits-tell-1;
1502 ctx.remaining_bits = remaining_bits;
1503 if (i <= codedBands-1)
1504 {
1505 curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1506 b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1507 } else {
1508 b = 0;
1509 }
1510
1511 #ifndef DISABLE_UPDATE_DRAFT
1512 if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1513 lowband_offset = i;
1514 if (i == start+1)
1515 special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1516 #else
1517 if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1518 lowband_offset = i;
1519 #endif
1520
1521 tf_change = tf_res[i];
1522 ctx.tf_change = tf_change;
1523 if (i>=m->effEBands)
1524 {
1525 X=norm;
1526 if (Y_!=NULL)
1527 Y = norm;
1528 lowband_scratch = NULL;
1529 }
1530 if (last && !theta_rdo)
1531 lowband_scratch = NULL;
1532
1533 /* Get a conservative estimate of the collapse_mask's for the bands we're
1534 going to be folding from. */
1535 if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1536 {
1537 int fold_start;
1538 int fold_end;
1539 int fold_i;
1540 /* This ensures we never repeat spectral content within one band */
1541 effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1542 fold_start = lowband_offset;
1543 while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1544 fold_end = lowband_offset-1;
1545 #ifndef DISABLE_UPDATE_DRAFT
1546 while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1547 #else
1548 while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1549 #endif
1550 x_cm = y_cm = 0;
1551 fold_i = fold_start; do {
1552 x_cm |= collapse_masks[fold_i*C+0];
1553 y_cm |= collapse_masks[fold_i*C+C-1];
1554 } while (++fold_i<fold_end);
1555 }
1556 /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1557 always) be non-zero. */
1558 else
1559 x_cm = y_cm = (1<<B)-1;
1560
1561 if (dual_stereo && i==intensity)
1562 {
1563 int j;
1564
1565 /* Switch off dual stereo to do intensity. */
1566 dual_stereo = 0;
1567 if (resynth)
1568 for (j=0;j<M*eBands[i]-norm_offset;j++)
1569 norm[j] = HALF32(norm[j]+norm2[j]);
1570 }
1571 if (dual_stereo)
1572 {
1573 x_cm = quant_band(&ctx, X, N, b/2, B,
1574 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1575 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
1576 y_cm = quant_band(&ctx, Y, N, b/2, B,
1577 effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1578 last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
1579 } else {
1580 if (Y!=NULL)
1581 {
1582 if (theta_rdo && i < intensity)
1583 {
1584 ec_ctx ec_save, ec_save2;
1585 struct band_ctx ctx_save, ctx_save2;
1586 opus_val32 dist0, dist1;
1587 unsigned cm, cm2;
1588 int nstart_bytes, nend_bytes, save_bytes;
1589 unsigned char *bytes_buf;
1590 unsigned char bytes_save[1275];
1591 opus_val16 w[2];
1592 compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1593 /* Make a copy. */
1594 cm = x_cm|y_cm;
1595 ec_save = *ec;
1596 ctx_save = ctx;
1597 OPUS_COPY(X_save, X, N);
1598 OPUS_COPY(Y_save, Y, N);
1599 /* Encode and round down. */
1600 ctx.theta_round = -1;
1601 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1602 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1603 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1604 dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1605
1606 /* Save first result. */
1607 cm2 = x_cm;
1608 ec_save2 = *ec;
1609 ctx_save2 = ctx;
1610 OPUS_COPY(X_save2, X, N);
1611 OPUS_COPY(Y_save2, Y, N);
1612 if (!last)
1613 OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1614 nstart_bytes = ec_save.offs;
1615 nend_bytes = ec_save.storage;
1616 bytes_buf = ec_save.buf+nstart_bytes;
1617 save_bytes = nend_bytes-nstart_bytes;
1618 OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1619
1620 /* Restore */
1621 *ec = ec_save;
1622 ctx = ctx_save;
1623 OPUS_COPY(X, X_save, N);
1624 OPUS_COPY(Y, Y_save, N);
1625 #ifndef DISABLE_UPDATE_DRAFT
1626 if (i == start+1)
1627 special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1628 #endif
1629 /* Encode and round up. */
1630 ctx.theta_round = 1;
1631 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1632 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1633 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1634 dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1635 if (dist0 >= dist1) {
1636 x_cm = cm2;
1637 *ec = ec_save2;
1638 ctx = ctx_save2;
1639 OPUS_COPY(X, X_save2, N);
1640 OPUS_COPY(Y, Y_save2, N);
1641 if (!last)
1642 OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1643 OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1644 }
1645 } else {
1646 ctx.theta_round = 0;
1647 x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1648 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1649 last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1650 }
1651 } else {
1652 x_cm = quant_band(&ctx, X, N, b, B,
1653 effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1654 last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1655 }
1656 y_cm = x_cm;
1657 }
1658 collapse_masks[i*C+0] = (unsigned char)x_cm;
1659 collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1660 balance += pulses[i] + tell;
1661
1662 /* Update the folding position only as long as we have 1 bit/sample depth. */
1663 update_lowband = b>(N<<BITRES);
1664 /* We only need to avoid noise on a split for the first band. After that, we
1665 have folding. */
1666 ctx.avoid_split_noise = 0;
1667 }
1668 *seed = ctx.seed;
1669
1670 RESTORE_STACK;
1671 }
1672
1673