• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright (c) 2007-2008 CSIRO
2    Copyright (c) 2007-2009 Xiph.Org Foundation
3    Copyright (c) 2008-2009 Gregory Maxwell
4    Written by Jean-Marc Valin and Gregory Maxwell */
5 /*
6    Redistribution and use in source and binary forms, with or without
7    modification, are permitted provided that the following conditions
8    are met:
9 
10    - Redistributions of source code must retain the above copyright
11    notice, this list of conditions and the following disclaimer.
12 
13    - Redistributions in binary form must reproduce the above copyright
14    notice, this list of conditions and the following disclaimer in the
15    documentation and/or other materials provided with the distribution.
16 
17    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #ifdef HAVE_CONFIG_H
31 #include "config.h"
32 #endif
33 
34 #include <math.h>
35 #include "bands.h"
36 #include "modes.h"
37 #include "vq.h"
38 #include "cwrs.h"
39 #include "stack_alloc.h"
40 #include "os_support.h"
41 #include "mathops.h"
42 #include "rate.h"
43 #include "quant_bands.h"
44 #include "pitch.h"
45 
hysteresis_decision(opus_val16 val,const opus_val16 * thresholds,const opus_val16 * hysteresis,int N,int prev)46 int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47 {
48    int i;
49    for (i=0;i<N;i++)
50    {
51       if (val < thresholds[i])
52          break;
53    }
54    if (i>prev && val < thresholds[prev]+hysteresis[prev])
55       i=prev;
56    if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57       i=prev;
58    return i;
59 }
60 
celt_lcg_rand(opus_uint32 seed)61 opus_uint32 celt_lcg_rand(opus_uint32 seed)
62 {
63    return 1664525 * seed + 1013904223;
64 }
65 
66 /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67    with this approximation is important because it has an impact on the bit allocation */
bitexact_cos(opus_int16 x)68 opus_int16 bitexact_cos(opus_int16 x)
69 {
70    opus_int32 tmp;
71    opus_int16 x2;
72    tmp = (4096+((opus_int32)(x)*(x)))>>13;
73    celt_sig_assert(tmp<=32767);
74    x2 = tmp;
75    x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76    celt_sig_assert(x2<=32766);
77    return 1+x2;
78 }
79 
bitexact_log2tan(int isin,int icos)80 int bitexact_log2tan(int isin,int icos)
81 {
82    int lc;
83    int ls;
84    lc=EC_ILOG(icos);
85    ls=EC_ILOG(isin);
86    icos<<=15-lc;
87    isin<<=15-ls;
88    return (ls-lc)*(1<<11)
89          +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90          -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91 }
92 
93 #ifdef FIXED_POINT
94 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)95 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
96 {
97    int i, c, N;
98    const opus_int16 *eBands = m->eBands;
99    (void)arch;
100    N = m->shortMdctSize<<LM;
101    c=0; do {
102       for (i=0;i<end;i++)
103       {
104          int j;
105          opus_val32 maxval=0;
106          opus_val32 sum = 0;
107 
108          maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
109          if (maxval > 0)
110          {
111             int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
112             j=eBands[i]<<LM;
113             if (shift>0)
114             {
115                do {
116                   sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
117                         EXTRACT16(SHR32(X[j+c*N],shift)));
118                } while (++j<eBands[i+1]<<LM);
119             } else {
120                do {
121                   sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
122                         EXTRACT16(SHL32(X[j+c*N],-shift)));
123                } while (++j<eBands[i+1]<<LM);
124             }
125             /* We're adding one here to ensure the normalized band isn't larger than unity norm */
126             bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
127          } else {
128             bandE[i+c*m->nbEBands] = EPSILON;
129          }
130          /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
131       }
132    } while (++c<C);
133    /*printf ("\n");*/
134 }
135 
136 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)137 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
138 {
139    int i, c, N;
140    const opus_int16 *eBands = m->eBands;
141    N = M*m->shortMdctSize;
142    c=0; do {
143       i=0; do {
144          opus_val16 g;
145          int j,shift;
146          opus_val16 E;
147          shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
148          E = VSHR32(bandE[i+c*m->nbEBands], shift);
149          g = EXTRACT16(celt_rcp(SHL32(E,3)));
150          j=M*eBands[i]; do {
151             X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
152          } while (++j<M*eBands[i+1]);
153       } while (++i<end);
154    } while (++c<C);
155 }
156 
157 #else /* FIXED_POINT */
158 /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)159 void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
160 {
161    int i, c, N;
162    const opus_int16 *eBands = m->eBands;
163    N = m->shortMdctSize<<LM;
164    c=0; do {
165       for (i=0;i<end;i++)
166       {
167          opus_val32 sum;
168          sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
169          bandE[i+c*m->nbEBands] = celt_sqrt(sum);
170          /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
171       }
172    } while (++c<C);
173    /*printf ("\n");*/
174 }
175 
176 /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)177 void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
178 {
179    int i, c, N;
180    const opus_int16 *eBands = m->eBands;
181    N = M*m->shortMdctSize;
182    c=0; do {
183       for (i=0;i<end;i++)
184       {
185          int j;
186          opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
187          for (j=M*eBands[i];j<M*eBands[i+1];j++)
188             X[j+c*N] = freq[j+c*N]*g;
189       }
190    } while (++c<C);
191 }
192 
193 #endif /* FIXED_POINT */
194 
195 /* De-normalise the energy to produce the synthesis from the unit-energy bands */
denormalise_bands(const CELTMode * m,const celt_norm * OPUS_RESTRICT X,celt_sig * OPUS_RESTRICT freq,const opus_val16 * bandLogE,int start,int end,int M,int downsample,int silence)196 void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
197       celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
198       int end, int M, int downsample, int silence)
199 {
200    int i, N;
201    int bound;
202    celt_sig * OPUS_RESTRICT f;
203    const celt_norm * OPUS_RESTRICT x;
204    const opus_int16 *eBands = m->eBands;
205    N = M*m->shortMdctSize;
206    bound = M*eBands[end];
207    if (downsample!=1)
208       bound = IMIN(bound, N/downsample);
209    if (silence)
210    {
211       bound = 0;
212       start = end = 0;
213    }
214    f = freq;
215    x = X+M*eBands[start];
216    for (i=0;i<M*eBands[start];i++)
217       *f++ = 0;
218    for (i=start;i<end;i++)
219    {
220       int j, band_end;
221       opus_val16 g;
222       opus_val16 lg;
223 #ifdef FIXED_POINT
224       int shift;
225 #endif
226       j=M*eBands[i];
227       band_end = M*eBands[i+1];
228       lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
229 #ifndef FIXED_POINT
230       g = celt_exp2(MIN32(32.f, lg));
231 #else
232       /* Handle the integer part of the log energy */
233       shift = 16-(lg>>DB_SHIFT);
234       if (shift>31)
235       {
236          shift=0;
237          g=0;
238       } else {
239          /* Handle the fractional part. */
240          g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
241       }
242       /* Handle extreme gains with negative shift. */
243       if (shift<0)
244       {
245          /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
246             capping the gain here, which is equivalent to a cap of 18 on lg.
247             This shouldn't trigger unless the bitstream is already corrupted. */
248          if (shift <= -2)
249          {
250             g = 16384;
251             shift = -2;
252          }
253          do {
254             *f++ = SHL32(MULT16_16(*x++, g), -shift);
255          } while (++j<band_end);
256       } else
257 #endif
258          /* Be careful of the fixed-point "else" just above when changing this code */
259          do {
260             *f++ = SHR32(MULT16_16(*x++, g), shift);
261          } while (++j<band_end);
262    }
263    celt_assert(start <= end);
264    OPUS_CLEAR(&freq[bound], N-bound);
265 }
266 
267 /* This prevents energy collapse for transients with multiple short MDCTs */
anti_collapse(const CELTMode * m,celt_norm * X_,unsigned char * collapse_masks,int LM,int C,int size,int start,int end,const opus_val16 * logE,const opus_val16 * prev1logE,const opus_val16 * prev2logE,const int * pulses,opus_uint32 seed,int arch)268 void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
269       int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
270       const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
271 {
272    int c, i, j, k;
273    for (i=start;i<end;i++)
274    {
275       int N0;
276       opus_val16 thresh, sqrt_1;
277       int depth;
278 #ifdef FIXED_POINT
279       int shift;
280       opus_val32 thresh32;
281 #endif
282 
283       N0 = m->eBands[i+1]-m->eBands[i];
284       /* depth in 1/8 bits */
285       celt_sig_assert(pulses[i]>=0);
286       depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
287 
288 #ifdef FIXED_POINT
289       thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
290       thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
291       {
292          opus_val32 t;
293          t = N0<<LM;
294          shift = celt_ilog2(t)>>1;
295          t = SHL32(t, (7-shift)<<1);
296          sqrt_1 = celt_rsqrt_norm(t);
297       }
298 #else
299       thresh = .5f*celt_exp2(-.125f*depth);
300       sqrt_1 = celt_rsqrt(N0<<LM);
301 #endif
302 
303       c=0; do
304       {
305          celt_norm *X;
306          opus_val16 prev1;
307          opus_val16 prev2;
308          opus_val32 Ediff;
309          opus_val16 r;
310          int renormalize=0;
311          prev1 = prev1logE[c*m->nbEBands+i];
312          prev2 = prev2logE[c*m->nbEBands+i];
313          if (C==1)
314          {
315             prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
316             prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
317          }
318          Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
319          Ediff = MAX32(0, Ediff);
320 
321 #ifdef FIXED_POINT
322          if (Ediff < 16384)
323          {
324             opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
325             r = 2*MIN16(16383,r32);
326          } else {
327             r = 0;
328          }
329          if (LM==3)
330             r = MULT16_16_Q14(23170, MIN32(23169, r));
331          r = SHR16(MIN16(thresh, r),1);
332          r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
333 #else
334          /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
335             short blocks don't have the same energy as long */
336          r = 2.f*celt_exp2(-Ediff);
337          if (LM==3)
338             r *= 1.41421356f;
339          r = MIN16(thresh, r);
340          r = r*sqrt_1;
341 #endif
342          X = X_+c*size+(m->eBands[i]<<LM);
343          for (k=0;k<1<<LM;k++)
344          {
345             /* Detect collapse */
346             if (!(collapse_masks[i*C+c]&1<<k))
347             {
348                /* Fill with noise */
349                for (j=0;j<N0;j++)
350                {
351                   seed = celt_lcg_rand(seed);
352                   X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
353                }
354                renormalize = 1;
355             }
356          }
357          /* We just added some energy, so we need to renormalise */
358          if (renormalize)
359             renormalise_vector(X, N0<<LM, Q15ONE, arch);
360       } while (++c<C);
361    }
362 }
363 
364 /* Compute the weights to use for optimizing normalized distortion across
365    channels. We use the amplitude to weight square distortion, which means
366    that we use the square root of the value we would have been using if we
367    wanted to minimize the MSE in the non-normalized domain. This roughly
368    corresponds to some quick-and-dirty perceptual experiments I ran to
369    measure inter-aural masking (there doesn't seem to be any published data
370    on the topic). */
compute_channel_weights(celt_ener Ex,celt_ener Ey,opus_val16 w[2])371 static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
372 {
373    celt_ener minE;
374 #ifdef FIXED_POINT
375    int shift;
376 #endif
377    minE = MIN32(Ex, Ey);
378    /* Adjustment to make the weights a bit more conservative. */
379    Ex = ADD32(Ex, minE/3);
380    Ey = ADD32(Ey, minE/3);
381 #ifdef FIXED_POINT
382    shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
383 #endif
384    w[0] = VSHR32(Ex, shift);
385    w[1] = VSHR32(Ey, shift);
386 }
387 
intensity_stereo(const CELTMode * m,celt_norm * OPUS_RESTRICT X,const celt_norm * OPUS_RESTRICT Y,const celt_ener * bandE,int bandID,int N)388 static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
389 {
390    int i = bandID;
391    int j;
392    opus_val16 a1, a2;
393    opus_val16 left, right;
394    opus_val16 norm;
395 #ifdef FIXED_POINT
396    int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
397 #endif
398    left = VSHR32(bandE[i],shift);
399    right = VSHR32(bandE[i+m->nbEBands],shift);
400    norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
401    a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
402    a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
403    for (j=0;j<N;j++)
404    {
405       celt_norm r, l;
406       l = X[j];
407       r = Y[j];
408       X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
409       /* Side is not encoded, no need to calculate */
410    }
411 }
412 
stereo_split(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,int N)413 static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
414 {
415    int j;
416    for (j=0;j<N;j++)
417    {
418       opus_val32 r, l;
419       l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
420       r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
421       X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
422       Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
423    }
424 }
425 
stereo_merge(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,opus_val16 mid,int N,int arch)426 static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
427 {
428    int j;
429    opus_val32 xp=0, side=0;
430    opus_val32 El, Er;
431    opus_val16 mid2;
432 #ifdef FIXED_POINT
433    int kl, kr;
434 #endif
435    opus_val32 t, lgain, rgain;
436 
437    /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
438    dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
439    /* Compensating for the mid normalization */
440    xp = MULT16_32_Q15(mid, xp);
441    /* mid and side are in Q15, not Q14 like X and Y */
442    mid2 = SHR16(mid, 1);
443    El = MULT16_16(mid2, mid2) + side - 2*xp;
444    Er = MULT16_16(mid2, mid2) + side + 2*xp;
445    if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
446    {
447       OPUS_COPY(Y, X, N);
448       return;
449    }
450 
451 #ifdef FIXED_POINT
452    kl = celt_ilog2(El)>>1;
453    kr = celt_ilog2(Er)>>1;
454 #endif
455    t = VSHR32(El, (kl-7)<<1);
456    lgain = celt_rsqrt_norm(t);
457    t = VSHR32(Er, (kr-7)<<1);
458    rgain = celt_rsqrt_norm(t);
459 
460 #ifdef FIXED_POINT
461    if (kl < 7)
462       kl = 7;
463    if (kr < 7)
464       kr = 7;
465 #endif
466 
467    for (j=0;j<N;j++)
468    {
469       celt_norm r, l;
470       /* Apply mid scaling (side is already scaled) */
471       l = MULT16_16_P15(mid, X[j]);
472       r = Y[j];
473       X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
474       Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
475    }
476 }
477 
478 /* Decide whether we should spread the pulses in the current frame */
spreading_decision(const CELTMode * m,const celt_norm * X,int * average,int last_decision,int * hf_average,int * tapset_decision,int update_hf,int end,int C,int M,const int * spread_weight)479 int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
480       int last_decision, int *hf_average, int *tapset_decision, int update_hf,
481       int end, int C, int M, const int *spread_weight)
482 {
483    int i, c, N0;
484    int sum = 0, nbBands=0;
485    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
486    int decision;
487    int hf_sum=0;
488 
489    celt_assert(end>0);
490 
491    N0 = M*m->shortMdctSize;
492 
493    if (M*(eBands[end]-eBands[end-1]) <= 8)
494       return SPREAD_NONE;
495    c=0; do {
496       for (i=0;i<end;i++)
497       {
498          int j, N, tmp=0;
499          int tcount[3] = {0,0,0};
500          const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
501          N = M*(eBands[i+1]-eBands[i]);
502          if (N<=8)
503             continue;
504          /* Compute rough CDF of |x[j]| */
505          for (j=0;j<N;j++)
506          {
507             opus_val32 x2N; /* Q13 */
508 
509             x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
510             if (x2N < QCONST16(0.25f,13))
511                tcount[0]++;
512             if (x2N < QCONST16(0.0625f,13))
513                tcount[1]++;
514             if (x2N < QCONST16(0.015625f,13))
515                tcount[2]++;
516          }
517 
518          /* Only include four last bands (8 kHz and up) */
519          if (i>m->nbEBands-4)
520             hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
521          tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
522          sum += tmp*spread_weight[i];
523          nbBands+=spread_weight[i];
524       }
525    } while (++c<C);
526 
527    if (update_hf)
528    {
529       if (hf_sum)
530          hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
531       *hf_average = (*hf_average+hf_sum)>>1;
532       hf_sum = *hf_average;
533       if (*tapset_decision==2)
534          hf_sum += 4;
535       else if (*tapset_decision==0)
536          hf_sum -= 4;
537       if (hf_sum > 22)
538          *tapset_decision=2;
539       else if (hf_sum > 18)
540          *tapset_decision=1;
541       else
542          *tapset_decision=0;
543    }
544    /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
545    celt_assert(nbBands>0); /* end has to be non-zero */
546    celt_assert(sum>=0);
547    sum = celt_udiv((opus_int32)sum<<8, nbBands);
548    /* Recursive averaging */
549    sum = (sum+*average)>>1;
550    *average = sum;
551    /* Hysteresis */
552    sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
553    if (sum < 80)
554    {
555       decision = SPREAD_AGGRESSIVE;
556    } else if (sum < 256)
557    {
558       decision = SPREAD_NORMAL;
559    } else if (sum < 384)
560    {
561       decision = SPREAD_LIGHT;
562    } else {
563       decision = SPREAD_NONE;
564    }
565 #ifdef FUZZING
566    decision = rand()&0x3;
567    *tapset_decision=rand()%3;
568 #endif
569    return decision;
570 }
571 
572 /* Indexing table for converting from natural Hadamard to ordery Hadamard
573    This is essentially a bit-reversed Gray, on top of which we've added
574    an inversion of the order because we want the DC at the end rather than
575    the beginning. The lines are for N=2, 4, 8, 16 */
576 static const int ordery_table[] = {
577        1,  0,
578        3,  0,  2,  1,
579        7,  0,  4,  3,  6,  1,  5,  2,
580       15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
581 };
582 
deinterleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)583 static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
584 {
585    int i,j;
586    VARDECL(celt_norm, tmp);
587    int N;
588    SAVE_STACK;
589    N = N0*stride;
590    ALLOC(tmp, N, celt_norm);
591    celt_assert(stride>0);
592    if (hadamard)
593    {
594       const int *ordery = ordery_table+stride-2;
595       for (i=0;i<stride;i++)
596       {
597          for (j=0;j<N0;j++)
598             tmp[ordery[i]*N0+j] = X[j*stride+i];
599       }
600    } else {
601       for (i=0;i<stride;i++)
602          for (j=0;j<N0;j++)
603             tmp[i*N0+j] = X[j*stride+i];
604    }
605    OPUS_COPY(X, tmp, N);
606    RESTORE_STACK;
607 }
608 
interleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)609 static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
610 {
611    int i,j;
612    VARDECL(celt_norm, tmp);
613    int N;
614    SAVE_STACK;
615    N = N0*stride;
616    ALLOC(tmp, N, celt_norm);
617    if (hadamard)
618    {
619       const int *ordery = ordery_table+stride-2;
620       for (i=0;i<stride;i++)
621          for (j=0;j<N0;j++)
622             tmp[j*stride+i] = X[ordery[i]*N0+j];
623    } else {
624       for (i=0;i<stride;i++)
625          for (j=0;j<N0;j++)
626             tmp[j*stride+i] = X[i*N0+j];
627    }
628    OPUS_COPY(X, tmp, N);
629    RESTORE_STACK;
630 }
631 
haar1(celt_norm * X,int N0,int stride)632 void haar1(celt_norm *X, int N0, int stride)
633 {
634    int i, j;
635    N0 >>= 1;
636    for (i=0;i<stride;i++)
637       for (j=0;j<N0;j++)
638       {
639          opus_val32 tmp1, tmp2;
640          tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
641          tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
642          X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
643          X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
644       }
645 }
646 
compute_qn(int N,int b,int offset,int pulse_cap,int stereo)647 static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
648 {
649    static const opus_int16 exp2_table8[8] =
650       {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
651    int qn, qb;
652    int N2 = 2*N-1;
653    if (stereo && N==2)
654       N2--;
655    /* The upper limit ensures that in a stereo split with itheta==16384, we'll
656        always have enough bits left over to code at least one pulse in the
657        side; otherwise it would collapse, since it doesn't get folded. */
658    qb = celt_sudiv(b+N2*offset, N2);
659    qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
660 
661    qb = IMIN(8<<BITRES, qb);
662 
663    if (qb<(1<<BITRES>>1)) {
664       qn = 1;
665    } else {
666       qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
667       qn = (qn+1)>>1<<1;
668    }
669    celt_assert(qn <= 256);
670    return qn;
671 }
672 
673 struct band_ctx {
674    int encode;
675    int resynth;
676    const CELTMode *m;
677    int i;
678    int intensity;
679    int spread;
680    int tf_change;
681    ec_ctx *ec;
682    opus_int32 remaining_bits;
683    const celt_ener *bandE;
684    opus_uint32 seed;
685    int arch;
686    int theta_round;
687    int disable_inv;
688    int avoid_split_noise;
689 };
690 
691 struct split_ctx {
692    int inv;
693    int imid;
694    int iside;
695    int delta;
696    int itheta;
697    int qalloc;
698 };
699 
compute_theta(struct band_ctx * ctx,struct split_ctx * sctx,celt_norm * X,celt_norm * Y,int N,int * b,int B,int B0,int LM,int stereo,int * fill)700 static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
701       celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
702       int LM,
703       int stereo, int *fill)
704 {
705    int qn;
706    int itheta=0;
707    int delta;
708    int imid, iside;
709    int qalloc;
710    int pulse_cap;
711    int offset;
712    opus_int32 tell;
713    int inv=0;
714    int encode;
715    const CELTMode *m;
716    int i;
717    int intensity;
718    ec_ctx *ec;
719    const celt_ener *bandE;
720 
721    encode = ctx->encode;
722    m = ctx->m;
723    i = ctx->i;
724    intensity = ctx->intensity;
725    ec = ctx->ec;
726    bandE = ctx->bandE;
727 
728    /* Decide on the resolution to give to the split parameter theta */
729    pulse_cap = m->logN[i]+LM*(1<<BITRES);
730    offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
731    qn = compute_qn(N, *b, offset, pulse_cap, stereo);
732    if (stereo && i>=intensity)
733       qn = 1;
734    if (encode)
735    {
736       /* theta is the atan() of the ratio between the (normalized)
737          side and mid. With just that parameter, we can re-scale both
738          mid and side because we know that 1) they have unit norm and
739          2) they are orthogonal. */
740       itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
741    }
742    tell = ec_tell_frac(ec);
743    if (qn!=1)
744    {
745       if (encode)
746       {
747          if (!stereo || ctx->theta_round == 0)
748          {
749             itheta = (itheta*(opus_int32)qn+8192)>>14;
750             if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
751             {
752                /* Check if the selected value of theta will cause the bit allocation
753                   to inject noise on one side. If so, make sure the energy of that side
754                   is zero. */
755                int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
756                imid = bitexact_cos((opus_int16)unquantized);
757                iside = bitexact_cos((opus_int16)(16384-unquantized));
758                delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
759                if (delta > *b)
760                   itheta = qn;
761                else if (delta < -*b)
762                   itheta = 0;
763             }
764          } else {
765             int down;
766             /* Bias quantization towards itheta=0 and itheta=16384. */
767             int bias = itheta > 8192 ? 32767/qn : -32767/qn;
768             down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
769             if (ctx->theta_round < 0)
770                itheta = down;
771             else
772                itheta = down+1;
773          }
774       }
775       /* Entropy coding of the angle. We use a uniform pdf for the
776          time split, a step for stereo, and a triangular one for the rest. */
777       if (stereo && N>2)
778       {
779          int p0 = 3;
780          int x = itheta;
781          int x0 = qn/2;
782          int ft = p0*(x0+1) + x0;
783          /* Use a probability of p0 up to itheta=8192 and then use 1 after */
784          if (encode)
785          {
786             ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
787          } else {
788             int fs;
789             fs=ec_decode(ec,ft);
790             if (fs<(x0+1)*p0)
791                x=fs/p0;
792             else
793                x=x0+1+(fs-(x0+1)*p0);
794             ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
795             itheta = x;
796          }
797       } else if (B0>1 || stereo) {
798          /* Uniform pdf */
799          if (encode)
800             ec_enc_uint(ec, itheta, qn+1);
801          else
802             itheta = ec_dec_uint(ec, qn+1);
803       } else {
804          int fs=1, ft;
805          ft = ((qn>>1)+1)*((qn>>1)+1);
806          if (encode)
807          {
808             int fl;
809 
810             fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
811             fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
812              ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
813 
814             ec_encode(ec, fl, fl+fs, ft);
815          } else {
816             /* Triangular pdf */
817             int fl=0;
818             int fm;
819             fm = ec_decode(ec, ft);
820 
821             if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
822             {
823                itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
824                fs = itheta + 1;
825                fl = itheta*(itheta + 1)>>1;
826             }
827             else
828             {
829                itheta = (2*(qn + 1)
830                 - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
831                fs = qn + 1 - itheta;
832                fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
833             }
834 
835             ec_dec_update(ec, fl, fl+fs, ft);
836          }
837       }
838       celt_assert(itheta>=0);
839       itheta = celt_udiv((opus_int32)itheta*16384, qn);
840       if (encode && stereo)
841       {
842          if (itheta==0)
843             intensity_stereo(m, X, Y, bandE, i, N);
844          else
845             stereo_split(X, Y, N);
846       }
847       /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
848                Let's do that at higher complexity */
849    } else if (stereo) {
850       if (encode)
851       {
852          inv = itheta > 8192 && !ctx->disable_inv;
853          if (inv)
854          {
855             int j;
856             for (j=0;j<N;j++)
857                Y[j] = -Y[j];
858          }
859          intensity_stereo(m, X, Y, bandE, i, N);
860       }
861       if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
862       {
863          if (encode)
864             ec_enc_bit_logp(ec, inv, 2);
865          else
866             inv = ec_dec_bit_logp(ec, 2);
867       } else
868          inv = 0;
869       /* inv flag override to avoid problems with downmixing. */
870       if (ctx->disable_inv)
871          inv = 0;
872       itheta = 0;
873    }
874    qalloc = ec_tell_frac(ec) - tell;
875    *b -= qalloc;
876 
877    if (itheta == 0)
878    {
879       imid = 32767;
880       iside = 0;
881       *fill &= (1<<B)-1;
882       delta = -16384;
883    } else if (itheta == 16384)
884    {
885       imid = 0;
886       iside = 32767;
887       *fill &= ((1<<B)-1)<<B;
888       delta = 16384;
889    } else {
890       imid = bitexact_cos((opus_int16)itheta);
891       iside = bitexact_cos((opus_int16)(16384-itheta));
892       /* This is the mid vs side allocation that minimizes squared error
893          in that band. */
894       delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
895    }
896 
897    sctx->inv = inv;
898    sctx->imid = imid;
899    sctx->iside = iside;
900    sctx->delta = delta;
901    sctx->itheta = itheta;
902    sctx->qalloc = qalloc;
903 }
quant_band_n1(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,celt_norm * lowband_out)904 static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
905       celt_norm *lowband_out)
906 {
907    int c;
908    int stereo;
909    celt_norm *x = X;
910    int encode;
911    ec_ctx *ec;
912 
913    encode = ctx->encode;
914    ec = ctx->ec;
915 
916    stereo = Y != NULL;
917    c=0; do {
918       int sign=0;
919       if (ctx->remaining_bits>=1<<BITRES)
920       {
921          if (encode)
922          {
923             sign = x[0]<0;
924             ec_enc_bits(ec, sign, 1);
925          } else {
926             sign = ec_dec_bits(ec, 1);
927          }
928          ctx->remaining_bits -= 1<<BITRES;
929       }
930       if (ctx->resynth)
931          x[0] = sign ? -NORM_SCALING : NORM_SCALING;
932       x = Y;
933    } while (++c<1+stereo);
934    if (lowband_out)
935       lowband_out[0] = SHR16(X[0],4);
936    return 1;
937 }
938 
939 /* This function is responsible for encoding and decoding a mono partition.
940    It can split the band in two and transmit the energy difference with
941    the two half-bands. It can be called recursively so bands can end up being
942    split in 8 parts. */
quant_partition(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,opus_val16 gain,int fill)943 static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
944       int N, int b, int B, celt_norm *lowband,
945       int LM,
946       opus_val16 gain, int fill)
947 {
948    const unsigned char *cache;
949    int q;
950    int curr_bits;
951    int imid=0, iside=0;
952    int B0=B;
953    opus_val16 mid=0, side=0;
954    unsigned cm=0;
955    celt_norm *Y=NULL;
956    int encode;
957    const CELTMode *m;
958    int i;
959    int spread;
960    ec_ctx *ec;
961 
962    encode = ctx->encode;
963    m = ctx->m;
964    i = ctx->i;
965    spread = ctx->spread;
966    ec = ctx->ec;
967 
968    /* If we need 1.5 more bit than we can produce, split the band in two. */
969    cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
970    if (LM != -1 && b > cache[cache[0]]+12 && N>2)
971    {
972       int mbits, sbits, delta;
973       int itheta;
974       int qalloc;
975       struct split_ctx sctx;
976       celt_norm *next_lowband2=NULL;
977       opus_int32 rebalance;
978 
979       N >>= 1;
980       Y = X+N;
981       LM -= 1;
982       if (B==1)
983          fill = (fill&1)|(fill<<1);
984       B = (B+1)>>1;
985 
986       compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
987       imid = sctx.imid;
988       iside = sctx.iside;
989       delta = sctx.delta;
990       itheta = sctx.itheta;
991       qalloc = sctx.qalloc;
992 #ifdef FIXED_POINT
993       mid = imid;
994       side = iside;
995 #else
996       mid = (1.f/32768)*imid;
997       side = (1.f/32768)*iside;
998 #endif
999 
1000       /* Give more bits to low-energy MDCTs than they would otherwise deserve */
1001       if (B0>1 && (itheta&0x3fff))
1002       {
1003          if (itheta > 8192)
1004             /* Rough approximation for pre-echo masking */
1005             delta -= delta>>(4-LM);
1006          else
1007             /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
1008             delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
1009       }
1010       mbits = IMAX(0, IMIN(b, (b-delta)/2));
1011       sbits = b-mbits;
1012       ctx->remaining_bits -= qalloc;
1013 
1014       if (lowband)
1015          next_lowband2 = lowband+N; /* >32-bit split case */
1016 
1017       rebalance = ctx->remaining_bits;
1018       if (mbits >= sbits)
1019       {
1020          cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
1021                MULT16_16_P15(gain,mid), fill);
1022          rebalance = mbits - (rebalance-ctx->remaining_bits);
1023          if (rebalance > 3<<BITRES && itheta!=0)
1024             sbits += rebalance - (3<<BITRES);
1025          cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1026                MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1027       } else {
1028          cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1029                MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1030          rebalance = sbits - (rebalance-ctx->remaining_bits);
1031          if (rebalance > 3<<BITRES && itheta!=16384)
1032             mbits += rebalance - (3<<BITRES);
1033          cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
1034                MULT16_16_P15(gain,mid), fill);
1035       }
1036    } else {
1037       /* This is the basic no-split case */
1038       q = bits2pulses(m, i, LM, b);
1039       curr_bits = pulses2bits(m, i, LM, q);
1040       ctx->remaining_bits -= curr_bits;
1041 
1042       /* Ensures we can never bust the budget */
1043       while (ctx->remaining_bits < 0 && q > 0)
1044       {
1045          ctx->remaining_bits += curr_bits;
1046          q--;
1047          curr_bits = pulses2bits(m, i, LM, q);
1048          ctx->remaining_bits -= curr_bits;
1049       }
1050 
1051       if (q!=0)
1052       {
1053          int K = get_pulses(q);
1054 
1055          /* Finally do the actual quantization */
1056          if (encode)
1057          {
1058             cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
1059          } else {
1060             cm = alg_unquant(X, N, K, spread, B, ec, gain);
1061          }
1062       } else {
1063          /* If there's no pulse, fill the band anyway */
1064          int j;
1065          if (ctx->resynth)
1066          {
1067             unsigned cm_mask;
1068             /* B can be as large as 16, so this shift might overflow an int on a
1069                16-bit platform; use a long to get defined behavior.*/
1070             cm_mask = (unsigned)(1UL<<B)-1;
1071             fill &= cm_mask;
1072             if (!fill)
1073             {
1074                OPUS_CLEAR(X, N);
1075             } else {
1076                if (lowband == NULL)
1077                {
1078                   /* Noise */
1079                   for (j=0;j<N;j++)
1080                   {
1081                      ctx->seed = celt_lcg_rand(ctx->seed);
1082                      X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
1083                   }
1084                   cm = cm_mask;
1085                } else {
1086                   /* Folded spectrum */
1087                   for (j=0;j<N;j++)
1088                   {
1089                      opus_val16 tmp;
1090                      ctx->seed = celt_lcg_rand(ctx->seed);
1091                      /* About 48 dB below the "normal" folding level */
1092                      tmp = QCONST16(1.0f/256, 10);
1093                      tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1094                      X[j] = lowband[j]+tmp;
1095                   }
1096                   cm = fill;
1097                }
1098                renormalise_vector(X, N, gain, ctx->arch);
1099             }
1100          }
1101       }
1102    }
1103 
1104    return cm;
1105 }
1106 
1107 
1108 /* This function is responsible for encoding and decoding a band for the mono case. */
quant_band(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,opus_val16 gain,celt_norm * lowband_scratch,int fill)1109 static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1110       int N, int b, int B, celt_norm *lowband,
1111       int LM, celt_norm *lowband_out,
1112       opus_val16 gain, celt_norm *lowband_scratch, int fill)
1113 {
1114    int N0=N;
1115    int N_B=N;
1116    int N_B0;
1117    int B0=B;
1118    int time_divide=0;
1119    int recombine=0;
1120    int longBlocks;
1121    unsigned cm=0;
1122    int k;
1123    int encode;
1124    int tf_change;
1125 
1126    encode = ctx->encode;
1127    tf_change = ctx->tf_change;
1128 
1129    longBlocks = B0==1;
1130 
1131    N_B = celt_udiv(N_B, B);
1132 
1133    /* Special case for one sample */
1134    if (N==1)
1135    {
1136       return quant_band_n1(ctx, X, NULL, lowband_out);
1137    }
1138 
1139    if (tf_change>0)
1140       recombine = tf_change;
1141    /* Band recombining to increase frequency resolution */
1142 
1143    if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1144    {
1145       OPUS_COPY(lowband_scratch, lowband, N);
1146       lowband = lowband_scratch;
1147    }
1148 
1149    for (k=0;k<recombine;k++)
1150    {
1151       static const unsigned char bit_interleave_table[16]={
1152             0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1153       };
1154       if (encode)
1155          haar1(X, N>>k, 1<<k);
1156       if (lowband)
1157          haar1(lowband, N>>k, 1<<k);
1158       fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1159    }
1160    B>>=recombine;
1161    N_B<<=recombine;
1162 
1163    /* Increasing the time resolution */
1164    while ((N_B&1) == 0 && tf_change<0)
1165    {
1166       if (encode)
1167          haar1(X, N_B, B);
1168       if (lowband)
1169          haar1(lowband, N_B, B);
1170       fill |= fill<<B;
1171       B <<= 1;
1172       N_B >>= 1;
1173       time_divide++;
1174       tf_change++;
1175    }
1176    B0=B;
1177    N_B0 = N_B;
1178 
1179    /* Reorganize the samples in time order instead of frequency order */
1180    if (B0>1)
1181    {
1182       if (encode)
1183          deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1184       if (lowband)
1185          deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1186    }
1187 
1188    cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
1189 
1190    /* This code is used by the decoder and by the resynthesis-enabled encoder */
1191    if (ctx->resynth)
1192    {
1193       /* Undo the sample reorganization going from time order to frequency order */
1194       if (B0>1)
1195          interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1196 
1197       /* Undo time-freq changes that we did earlier */
1198       N_B = N_B0;
1199       B = B0;
1200       for (k=0;k<time_divide;k++)
1201       {
1202          B >>= 1;
1203          N_B <<= 1;
1204          cm |= cm>>B;
1205          haar1(X, N_B, B);
1206       }
1207 
1208       for (k=0;k<recombine;k++)
1209       {
1210          static const unsigned char bit_deinterleave_table[16]={
1211                0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1212                0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1213          };
1214          cm = bit_deinterleave_table[cm];
1215          haar1(X, N0>>k, 1<<k);
1216       }
1217       B<<=recombine;
1218 
1219       /* Scale output for later folding */
1220       if (lowband_out)
1221       {
1222          int j;
1223          opus_val16 n;
1224          n = celt_sqrt(SHL32(EXTEND32(N0),22));
1225          for (j=0;j<N0;j++)
1226             lowband_out[j] = MULT16_16_Q15(n,X[j]);
1227       }
1228       cm &= (1<<B)-1;
1229    }
1230    return cm;
1231 }
1232 
1233 
1234 /* This function is responsible for encoding and decoding a band for the stereo case. */
quant_band_stereo(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,celt_norm * lowband_scratch,int fill)1235 static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1236       int N, int b, int B, celt_norm *lowband,
1237       int LM, celt_norm *lowband_out,
1238       celt_norm *lowband_scratch, int fill)
1239 {
1240    int imid=0, iside=0;
1241    int inv = 0;
1242    opus_val16 mid=0, side=0;
1243    unsigned cm=0;
1244    int mbits, sbits, delta;
1245    int itheta;
1246    int qalloc;
1247    struct split_ctx sctx;
1248    int orig_fill;
1249    int encode;
1250    ec_ctx *ec;
1251 
1252    encode = ctx->encode;
1253    ec = ctx->ec;
1254 
1255    /* Special case for one sample */
1256    if (N==1)
1257    {
1258       return quant_band_n1(ctx, X, Y, lowband_out);
1259    }
1260 
1261    orig_fill = fill;
1262 
1263    compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
1264    inv = sctx.inv;
1265    imid = sctx.imid;
1266    iside = sctx.iside;
1267    delta = sctx.delta;
1268    itheta = sctx.itheta;
1269    qalloc = sctx.qalloc;
1270 #ifdef FIXED_POINT
1271    mid = imid;
1272    side = iside;
1273 #else
1274    mid = (1.f/32768)*imid;
1275    side = (1.f/32768)*iside;
1276 #endif
1277 
1278    /* This is a special case for N=2 that only works for stereo and takes
1279       advantage of the fact that mid and side are orthogonal to encode
1280       the side with just one bit. */
1281    if (N==2)
1282    {
1283       int c;
1284       int sign=0;
1285       celt_norm *x2, *y2;
1286       mbits = b;
1287       sbits = 0;
1288       /* Only need one bit for the side. */
1289       if (itheta != 0 && itheta != 16384)
1290          sbits = 1<<BITRES;
1291       mbits -= sbits;
1292       c = itheta > 8192;
1293       ctx->remaining_bits -= qalloc+sbits;
1294 
1295       x2 = c ? Y : X;
1296       y2 = c ? X : Y;
1297       if (sbits)
1298       {
1299          if (encode)
1300          {
1301             /* Here we only need to encode a sign for the side. */
1302             sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
1303             ec_enc_bits(ec, sign, 1);
1304          } else {
1305             sign = ec_dec_bits(ec, 1);
1306          }
1307       }
1308       sign = 1-2*sign;
1309       /* We use orig_fill here because we want to fold the side, but if
1310          itheta==16384, we'll have cleared the low bits of fill. */
1311       cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1312             lowband_scratch, orig_fill);
1313       /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1314          and there's no need to worry about mixing with the other channel. */
1315       y2[0] = -sign*x2[1];
1316       y2[1] = sign*x2[0];
1317       if (ctx->resynth)
1318       {
1319          celt_norm tmp;
1320          X[0] = MULT16_16_Q15(mid, X[0]);
1321          X[1] = MULT16_16_Q15(mid, X[1]);
1322          Y[0] = MULT16_16_Q15(side, Y[0]);
1323          Y[1] = MULT16_16_Q15(side, Y[1]);
1324          tmp = X[0];
1325          X[0] = SUB16(tmp,Y[0]);
1326          Y[0] = ADD16(tmp,Y[0]);
1327          tmp = X[1];
1328          X[1] = SUB16(tmp,Y[1]);
1329          Y[1] = ADD16(tmp,Y[1]);
1330       }
1331    } else {
1332       /* "Normal" split code */
1333       opus_int32 rebalance;
1334 
1335       mbits = IMAX(0, IMIN(b, (b-delta)/2));
1336       sbits = b-mbits;
1337       ctx->remaining_bits -= qalloc;
1338 
1339       rebalance = ctx->remaining_bits;
1340       if (mbits >= sbits)
1341       {
1342          /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1343             mid for folding later. */
1344          cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1345                lowband_scratch, fill);
1346          rebalance = mbits - (rebalance-ctx->remaining_bits);
1347          if (rebalance > 3<<BITRES && itheta!=0)
1348             sbits += rebalance - (3<<BITRES);
1349 
1350          /* For a stereo split, the high bits of fill are always zero, so no
1351             folding will be done to the side. */
1352          cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1353       } else {
1354          /* For a stereo split, the high bits of fill are always zero, so no
1355             folding will be done to the side. */
1356          cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1357          rebalance = sbits - (rebalance-ctx->remaining_bits);
1358          if (rebalance > 3<<BITRES && itheta!=16384)
1359             mbits += rebalance - (3<<BITRES);
1360          /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1361             mid for folding later. */
1362          cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1363                lowband_scratch, fill);
1364       }
1365    }
1366 
1367 
1368    /* This code is used by the decoder and by the resynthesis-enabled encoder */
1369    if (ctx->resynth)
1370    {
1371       if (N!=2)
1372          stereo_merge(X, Y, mid, N, ctx->arch);
1373       if (inv)
1374       {
1375          int j;
1376          for (j=0;j<N;j++)
1377             Y[j] = -Y[j];
1378       }
1379    }
1380    return cm;
1381 }
1382 
1383 #ifndef DISABLE_UPDATE_DRAFT
special_hybrid_folding(const CELTMode * m,celt_norm * norm,celt_norm * norm2,int start,int M,int dual_stereo)1384 static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1385 {
1386    int n1, n2;
1387    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1388    n1 = M*(eBands[start+1]-eBands[start]);
1389    n2 = M*(eBands[start+2]-eBands[start+1]);
1390    /* Duplicate enough of the first band folding data to be able to fold the second band.
1391       Copies no data for CELT-only mode. */
1392    OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1393    if (dual_stereo)
1394       OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1395 }
1396 #endif
1397 
quant_all_bands(int encode,const CELTMode * m,int start,int end,celt_norm * X_,celt_norm * Y_,unsigned char * collapse_masks,const celt_ener * bandE,int * pulses,int shortBlocks,int spread,int dual_stereo,int intensity,int * tf_res,opus_int32 total_bits,opus_int32 balance,ec_ctx * ec,int LM,int codedBands,opus_uint32 * seed,int complexity,int arch,int disable_inv)1398 void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1399       celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1400       const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1401       int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1402       opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1403       opus_uint32 *seed, int complexity, int arch, int disable_inv)
1404 {
1405    int i;
1406    opus_int32 remaining_bits;
1407    const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1408    celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1409    VARDECL(celt_norm, _norm);
1410    VARDECL(celt_norm, _lowband_scratch);
1411    VARDECL(celt_norm, X_save);
1412    VARDECL(celt_norm, Y_save);
1413    VARDECL(celt_norm, X_save2);
1414    VARDECL(celt_norm, Y_save2);
1415    VARDECL(celt_norm, norm_save2);
1416    int resynth_alloc;
1417    celt_norm *lowband_scratch;
1418    int B;
1419    int M;
1420    int lowband_offset;
1421    int update_lowband = 1;
1422    int C = Y_ != NULL ? 2 : 1;
1423    int norm_offset;
1424    int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1425 #ifdef RESYNTH
1426    int resynth = 1;
1427 #else
1428    int resynth = !encode || theta_rdo;
1429 #endif
1430    struct band_ctx ctx;
1431    SAVE_STACK;
1432 
1433    M = 1<<LM;
1434    B = shortBlocks ? M : 1;
1435    norm_offset = M*eBands[start];
1436    /* No need to allocate norm for the last band because we don't need an
1437       output in that band. */
1438    ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1439    norm = _norm;
1440    norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1441 
1442    /* For decoding, we can use the last band as scratch space because we don't need that
1443       scratch space for the last band and we don't care about the data there until we're
1444       decoding the last band. */
1445    if (encode && resynth)
1446       resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1447    else
1448       resynth_alloc = ALLOC_NONE;
1449    ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1450    if (encode && resynth)
1451       lowband_scratch = _lowband_scratch;
1452    else
1453       lowband_scratch = X_+M*eBands[m->nbEBands-1];
1454    ALLOC(X_save, resynth_alloc, celt_norm);
1455    ALLOC(Y_save, resynth_alloc, celt_norm);
1456    ALLOC(X_save2, resynth_alloc, celt_norm);
1457    ALLOC(Y_save2, resynth_alloc, celt_norm);
1458    ALLOC(norm_save2, resynth_alloc, celt_norm);
1459 
1460    lowband_offset = 0;
1461    ctx.bandE = bandE;
1462    ctx.ec = ec;
1463    ctx.encode = encode;
1464    ctx.intensity = intensity;
1465    ctx.m = m;
1466    ctx.seed = *seed;
1467    ctx.spread = spread;
1468    ctx.arch = arch;
1469    ctx.disable_inv = disable_inv;
1470    ctx.resynth = resynth;
1471    ctx.theta_round = 0;
1472    /* Avoid injecting noise in the first band on transients. */
1473    ctx.avoid_split_noise = B > 1;
1474    for (i=start;i<end;i++)
1475    {
1476       opus_int32 tell;
1477       int b;
1478       int N;
1479       opus_int32 curr_balance;
1480       int effective_lowband=-1;
1481       celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1482       int tf_change=0;
1483       unsigned x_cm;
1484       unsigned y_cm;
1485       int last;
1486 
1487       ctx.i = i;
1488       last = (i==end-1);
1489 
1490       X = X_+M*eBands[i];
1491       if (Y_!=NULL)
1492          Y = Y_+M*eBands[i];
1493       else
1494          Y = NULL;
1495       N = M*eBands[i+1]-M*eBands[i];
1496       celt_assert(N > 0);
1497       tell = ec_tell_frac(ec);
1498 
1499       /* Compute how many bits we want to allocate to this band */
1500       if (i != start)
1501          balance -= tell;
1502       remaining_bits = total_bits-tell-1;
1503       ctx.remaining_bits = remaining_bits;
1504       if (i <= codedBands-1)
1505       {
1506          curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1507          b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1508       } else {
1509          b = 0;
1510       }
1511 
1512 #ifndef DISABLE_UPDATE_DRAFT
1513       if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1514             lowband_offset = i;
1515       if (i == start+1)
1516          special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1517 #else
1518       if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1519             lowband_offset = i;
1520 #endif
1521 
1522       tf_change = tf_res[i];
1523       ctx.tf_change = tf_change;
1524       if (i>=m->effEBands)
1525       {
1526          X=norm;
1527          if (Y_!=NULL)
1528             Y = norm;
1529          lowband_scratch = NULL;
1530       }
1531       if (last && !theta_rdo)
1532          lowband_scratch = NULL;
1533 
1534       /* Get a conservative estimate of the collapse_mask's for the bands we're
1535          going to be folding from. */
1536       if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1537       {
1538          int fold_start;
1539          int fold_end;
1540          int fold_i;
1541          /* This ensures we never repeat spectral content within one band */
1542          effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1543          fold_start = lowband_offset;
1544          while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1545          fold_end = lowband_offset-1;
1546 #ifndef DISABLE_UPDATE_DRAFT
1547          while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1548 #else
1549          while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1550 #endif
1551          x_cm = y_cm = 0;
1552          fold_i = fold_start; do {
1553            x_cm |= collapse_masks[fold_i*C+0];
1554            y_cm |= collapse_masks[fold_i*C+C-1];
1555          } while (++fold_i<fold_end);
1556       }
1557       /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1558          always) be non-zero. */
1559       else
1560          x_cm = y_cm = (1<<B)-1;
1561 
1562       if (dual_stereo && i==intensity)
1563       {
1564          int j;
1565 
1566          /* Switch off dual stereo to do intensity. */
1567          dual_stereo = 0;
1568          if (resynth)
1569             for (j=0;j<M*eBands[i]-norm_offset;j++)
1570                norm[j] = HALF32(norm[j]+norm2[j]);
1571       }
1572       if (dual_stereo)
1573       {
1574          x_cm = quant_band(&ctx, X, N, b/2, B,
1575                effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1576                last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
1577          y_cm = quant_band(&ctx, Y, N, b/2, B,
1578                effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1579                last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
1580       } else {
1581          if (Y!=NULL)
1582          {
1583             if (theta_rdo && i < intensity)
1584             {
1585                ec_ctx ec_save, ec_save2;
1586                struct band_ctx ctx_save, ctx_save2;
1587                opus_val32 dist0, dist1;
1588                unsigned cm, cm2;
1589                int nstart_bytes, nend_bytes, save_bytes;
1590                unsigned char *bytes_buf;
1591                unsigned char bytes_save[1275];
1592                opus_val16 w[2];
1593                compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1594                /* Make a copy. */
1595                cm = x_cm|y_cm;
1596                ec_save = *ec;
1597                ctx_save = ctx;
1598                OPUS_COPY(X_save, X, N);
1599                OPUS_COPY(Y_save, Y, N);
1600                /* Encode and round down. */
1601                ctx.theta_round = -1;
1602                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1603                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1604                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1605                dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1606 
1607                /* Save first result. */
1608                cm2 = x_cm;
1609                ec_save2 = *ec;
1610                ctx_save2 = ctx;
1611                OPUS_COPY(X_save2, X, N);
1612                OPUS_COPY(Y_save2, Y, N);
1613                if (!last)
1614                   OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1615                nstart_bytes = ec_save.offs;
1616                nend_bytes = ec_save.storage;
1617                bytes_buf = ec_save.buf+nstart_bytes;
1618                save_bytes = nend_bytes-nstart_bytes;
1619                OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1620 
1621                /* Restore */
1622                *ec = ec_save;
1623                ctx = ctx_save;
1624                OPUS_COPY(X, X_save, N);
1625                OPUS_COPY(Y, Y_save, N);
1626 #ifndef DISABLE_UPDATE_DRAFT
1627                if (i == start+1)
1628                   special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1629 #endif
1630                /* Encode and round up. */
1631                ctx.theta_round = 1;
1632                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1633                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1634                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1635                dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1636                if (dist0 >= dist1) {
1637                   x_cm = cm2;
1638                   *ec = ec_save2;
1639                   ctx = ctx_save2;
1640                   OPUS_COPY(X, X_save2, N);
1641                   OPUS_COPY(Y, Y_save2, N);
1642                   if (!last)
1643                      OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1644                   OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1645                }
1646             } else {
1647                ctx.theta_round = 0;
1648                x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1649                      effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1650                      last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1651             }
1652          } else {
1653             x_cm = quant_band(&ctx, X, N, b, B,
1654                   effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1655                   last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1656          }
1657          y_cm = x_cm;
1658       }
1659       collapse_masks[i*C+0] = (unsigned char)x_cm;
1660       collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1661       balance += pulses[i] + tell;
1662 
1663       /* Update the folding position only as long as we have 1 bit/sample depth. */
1664       update_lowband = b>(N<<BITRES);
1665       /* We only need to avoid noise on a split for the first band. After that, we
1666          have folding. */
1667       ctx.avoid_split_noise = 0;
1668    }
1669    *seed = ctx.seed;
1670 
1671    RESTORE_STACK;
1672 }
1673 
1674