1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4 /*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32
33 #include "mathops.h"
34 #include "cwrs.h"
35 #include "vq.h"
36 #include "arch.h"
37 #include "os_support.h"
38 #include "bands.h"
39 #include "rate.h"
40
exp_rotation1(celt_norm * X,int len,int stride,opus_val16 c,opus_val16 s)41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
42 {
43 int i;
44 celt_norm *Xptr;
45 Xptr = X;
46 for (i=0;i<len-stride;i++)
47 {
48 celt_norm x1, x2;
49 x1 = Xptr[0];
50 x2 = Xptr[stride];
51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
53 }
54 Xptr = &X[len-2*stride-1];
55 for (i=len-2*stride-1;i>=0;i--)
56 {
57 celt_norm x1, x2;
58 x1 = Xptr[0];
59 x2 = Xptr[stride];
60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
62 }
63 }
64
exp_rotation(celt_norm * X,int len,int dir,int stride,int K,int spread)65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
66 {
67 static const int SPREAD_FACTOR[3]={15,10,5};
68 int i;
69 opus_val16 c, s;
70 opus_val16 gain, theta;
71 int stride2=0;
72 int factor;
73
74 if (2*K>=len || spread==SPREAD_NONE)
75 return;
76 factor = SPREAD_FACTOR[spread-1];
77
78 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
79 theta = HALF16(MULT16_16_Q15(gain,gain));
80
81 c = celt_cos_norm(EXTEND32(theta));
82 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
83
84 if (len>=8*stride)
85 {
86 stride2 = 1;
87 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
88 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
89 while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
90 stride2++;
91 }
92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
93 extract_collapse_mask().*/
94 len /= stride;
95 for (i=0;i<stride;i++)
96 {
97 if (dir < 0)
98 {
99 if (stride2)
100 exp_rotation1(X+i*len, len, stride2, s, c);
101 exp_rotation1(X+i*len, len, 1, c, s);
102 } else {
103 exp_rotation1(X+i*len, len, 1, c, -s);
104 if (stride2)
105 exp_rotation1(X+i*len, len, stride2, s, -c);
106 }
107 }
108 }
109
110 /** Takes the pitch vector and the decoded residual vector, computes the gain
111 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
normalise_residual(int * OPUS_RESTRICT iy,celt_norm * OPUS_RESTRICT X,int N,opus_val32 Ryy,opus_val16 gain)112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
113 int N, opus_val32 Ryy, opus_val16 gain)
114 {
115 int i;
116 #ifdef FIXED_POINT
117 int k;
118 #endif
119 opus_val32 t;
120 opus_val16 g;
121
122 #ifdef FIXED_POINT
123 k = celt_ilog2(Ryy)>>1;
124 #endif
125 t = VSHR32(Ryy, 2*(k-7));
126 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
127
128 i=0;
129 do
130 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
131 while (++i < N);
132 }
133
extract_collapse_mask(int * iy,int N,int B)134 static unsigned extract_collapse_mask(int *iy, int N, int B)
135 {
136 unsigned collapse_mask;
137 int N0;
138 int i;
139 if (B<=1)
140 return 1;
141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
142 exp_rotation().*/
143 N0 = N/B;
144 collapse_mask = 0;
145 i=0; do {
146 int j;
147 j=0; do {
148 collapse_mask |= (iy[i*N0+j]!=0)<<i;
149 } while (++j<N0);
150 } while (++i<B);
151 return collapse_mask;
152 }
153
alg_quant(celt_norm * X,int N,int K,int spread,int B,ec_enc * enc,opus_val16 gain)154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
155 #ifdef RESYNTH
156 , opus_val16 gain
157 #endif
158 )
159 {
160 VARDECL(celt_norm, y);
161 VARDECL(int, iy);
162 VARDECL(opus_val16, signx);
163 int i, j;
164 opus_val16 s;
165 int pulsesLeft;
166 opus_val32 sum;
167 opus_val32 xy;
168 opus_val16 yy;
169 unsigned collapse_mask;
170 SAVE_STACK;
171
172 celt_assert2(K>0, "alg_quant() needs at least one pulse");
173 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
174
175 ALLOC(y, N, celt_norm);
176 ALLOC(iy, N, int);
177 ALLOC(signx, N, opus_val16);
178
179 exp_rotation(X, N, 1, B, K, spread);
180
181 /* Get rid of the sign */
182 sum = 0;
183 j=0; do {
184 if (X[j]>0)
185 signx[j]=1;
186 else {
187 signx[j]=-1;
188 X[j]=-X[j];
189 }
190 iy[j] = 0;
191 y[j] = 0;
192 } while (++j<N);
193
194 xy = yy = 0;
195
196 pulsesLeft = K;
197
198 /* Do a pre-search by projecting on the pyramid */
199 if (K > (N>>1))
200 {
201 opus_val16 rcp;
202 j=0; do {
203 sum += X[j];
204 } while (++j<N);
205
206 /* If X is too small, just replace it with a pulse at 0 */
207 #ifdef FIXED_POINT
208 if (sum <= K)
209 #else
210 /* Prevents infinities and NaNs from causing too many pulses
211 to be allocated. 64 is an approximation of infinity here. */
212 if (!(sum > EPSILON && sum < 64))
213 #endif
214 {
215 X[0] = QCONST16(1.f,14);
216 j=1; do
217 X[j]=0;
218 while (++j<N);
219 sum = QCONST16(1.f,14);
220 }
221 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
222 j=0; do {
223 #ifdef FIXED_POINT
224 /* It's really important to round *towards zero* here */
225 iy[j] = MULT16_16_Q15(X[j],rcp);
226 #else
227 iy[j] = (int)floor(rcp*X[j]);
228 #endif
229 y[j] = (celt_norm)iy[j];
230 yy = MAC16_16(yy, y[j],y[j]);
231 xy = MAC16_16(xy, X[j],y[j]);
232 y[j] *= 2;
233 pulsesLeft -= iy[j];
234 } while (++j<N);
235 }
236 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
237
238 /* This should never happen, but just in case it does (e.g. on silence)
239 we fill the first bin with pulses. */
240 #ifdef FIXED_POINT_DEBUG
241 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
242 #endif
243 if (pulsesLeft > N+3)
244 {
245 opus_val16 tmp = (opus_val16)pulsesLeft;
246 yy = MAC16_16(yy, tmp, tmp);
247 yy = MAC16_16(yy, tmp, y[0]);
248 iy[0] += pulsesLeft;
249 pulsesLeft=0;
250 }
251
252 s = 1;
253 for (i=0;i<pulsesLeft;i++)
254 {
255 int best_id;
256 opus_val32 best_num = -VERY_LARGE16;
257 opus_val16 best_den = 0;
258 #ifdef FIXED_POINT
259 int rshift;
260 #endif
261 #ifdef FIXED_POINT
262 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
263 #endif
264 best_id = 0;
265 /* The squared magnitude term gets added anyway, so we might as well
266 add it outside the loop */
267 yy = ADD32(yy, 1);
268 j=0;
269 do {
270 opus_val16 Rxy, Ryy;
271 /* Temporary sums of the new pulse(s) */
272 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
273 /* We're multiplying y[j] by two so we don't have to do it here */
274 Ryy = ADD16(yy, y[j]);
275
276 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
277 Rxy is positive because the sign is pre-computed) */
278 Rxy = MULT16_16_Q15(Rxy,Rxy);
279 /* The idea is to check for num/den >= best_num/best_den, but that way
280 we can do it without any division */
281 /* OPT: Make sure to use conditional moves here */
282 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
283 {
284 best_den = Ryy;
285 best_num = Rxy;
286 best_id = j;
287 }
288 } while (++j<N);
289
290 /* Updating the sums of the new pulse(s) */
291 xy = ADD32(xy, EXTEND32(X[best_id]));
292 /* We're multiplying y[j] by two so we don't have to do it here */
293 yy = ADD16(yy, y[best_id]);
294
295 /* Only now that we've made the final choice, update y/iy */
296 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
297 y[best_id] += 2*s;
298 iy[best_id]++;
299 }
300
301 /* Put the original sign back */
302 j=0;
303 do {
304 X[j] = MULT16_16(signx[j],X[j]);
305 if (signx[j] < 0)
306 iy[j] = -iy[j];
307 } while (++j<N);
308 encode_pulses(iy, N, K, enc);
309
310 #ifdef RESYNTH
311 normalise_residual(iy, X, N, yy, gain);
312 exp_rotation(X, N, -1, B, K, spread);
313 #endif
314
315 collapse_mask = extract_collapse_mask(iy, N, B);
316 RESTORE_STACK;
317 return collapse_mask;
318 }
319
320 /** Decode pulse vector and combine the result with the pitch vector to produce
321 the final normalised signal in the current band. */
alg_unquant(celt_norm * X,int N,int K,int spread,int B,ec_dec * dec,opus_val16 gain)322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
323 ec_dec *dec, opus_val16 gain)
324 {
325 int i;
326 opus_val32 Ryy;
327 unsigned collapse_mask;
328 VARDECL(int, iy);
329 SAVE_STACK;
330
331 celt_assert2(K>0, "alg_unquant() needs at least one pulse");
332 celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
333 ALLOC(iy, N, int);
334 decode_pulses(iy, N, K, dec);
335 Ryy = 0;
336 i=0;
337 do {
338 Ryy = MAC16_16(Ryy, iy[i], iy[i]);
339 } while (++i < N);
340 normalise_residual(iy, X, N, Ryy, gain);
341 exp_rotation(X, N, -1, B, K, spread);
342 collapse_mask = extract_collapse_mask(iy, N, B);
343 RESTORE_STACK;
344 return collapse_mask;
345 }
346
renormalise_vector(celt_norm * X,int N,opus_val16 gain)347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
348 {
349 int i;
350 #ifdef FIXED_POINT
351 int k;
352 #endif
353 opus_val32 E = EPSILON;
354 opus_val16 g;
355 opus_val32 t;
356 celt_norm *xptr = X;
357 for (i=0;i<N;i++)
358 {
359 E = MAC16_16(E, *xptr, *xptr);
360 xptr++;
361 }
362 #ifdef FIXED_POINT
363 k = celt_ilog2(E)>>1;
364 #endif
365 t = VSHR32(E, 2*(k-7));
366 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
367
368 xptr = X;
369 for (i=0;i<N;i++)
370 {
371 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
372 xptr++;
373 }
374 /*return celt_sqrt(E);*/
375 }
376
stereo_itheta(celt_norm * X,celt_norm * Y,int stereo,int N)377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
378 {
379 int i;
380 int itheta;
381 opus_val16 mid, side;
382 opus_val32 Emid, Eside;
383
384 Emid = Eside = EPSILON;
385 if (stereo)
386 {
387 for (i=0;i<N;i++)
388 {
389 celt_norm m, s;
390 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
391 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
392 Emid = MAC16_16(Emid, m, m);
393 Eside = MAC16_16(Eside, s, s);
394 }
395 } else {
396 for (i=0;i<N;i++)
397 {
398 celt_norm m, s;
399 m = X[i];
400 s = Y[i];
401 Emid = MAC16_16(Emid, m, m);
402 Eside = MAC16_16(Eside, s, s);
403 }
404 }
405 mid = celt_sqrt(Emid);
406 side = celt_sqrt(Eside);
407 #ifdef FIXED_POINT
408 /* 0.63662 = 2/pi */
409 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
410 #else
411 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
412 #endif
413
414 return itheta;
415 }
416