1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */
4 /*
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions
7 are met:
8
9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer.
11
12 - Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifdef HAVE_CONFIG_H
30 #include "config.h"
31 #endif
32
33 #include "mathops.h"
34 #include "cwrs.h"
35 #include "vq.h"
36 #include "arch.h"
37 #include "os_support.h"
38 #include "bands.h"
39 #include "rate.h"
40 #include "pitch.h"
41
42 #ifndef OVERRIDE_vq_exp_rotation1
exp_rotation1(celt_norm * X,int len,int stride,opus_val16 c,opus_val16 s)43 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
44 {
45 int i;
46 opus_val16 ms;
47 celt_norm *Xptr;
48 Xptr = X;
49 ms = NEG16(s);
50 for (i=0;i<len-stride;i++)
51 {
52 celt_norm x1, x2;
53 x1 = Xptr[0];
54 x2 = Xptr[stride];
55 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
56 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
57 }
58 Xptr = &X[len-2*stride-1];
59 for (i=len-2*stride-1;i>=0;i--)
60 {
61 celt_norm x1, x2;
62 x1 = Xptr[0];
63 x2 = Xptr[stride];
64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
66 }
67 }
68 #endif /* OVERRIDE_vq_exp_rotation1 */
69
exp_rotation(celt_norm * X,int len,int dir,int stride,int K,int spread)70 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
71 {
72 static const int SPREAD_FACTOR[3]={15,10,5};
73 int i;
74 opus_val16 c, s;
75 opus_val16 gain, theta;
76 int stride2=0;
77 int factor;
78
79 if (2*K>=len || spread==SPREAD_NONE)
80 return;
81 factor = SPREAD_FACTOR[spread-1];
82
83 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
84 theta = HALF16(MULT16_16_Q15(gain,gain));
85
86 c = celt_cos_norm(EXTEND32(theta));
87 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */
88
89 if (len>=8*stride)
90 {
91 stride2 = 1;
92 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
93 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
94 while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
95 stride2++;
96 }
97 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
98 extract_collapse_mask().*/
99 len = celt_udiv(len, stride);
100 for (i=0;i<stride;i++)
101 {
102 if (dir < 0)
103 {
104 if (stride2)
105 exp_rotation1(X+i*len, len, stride2, s, c);
106 exp_rotation1(X+i*len, len, 1, c, s);
107 } else {
108 exp_rotation1(X+i*len, len, 1, c, -s);
109 if (stride2)
110 exp_rotation1(X+i*len, len, stride2, s, -c);
111 }
112 }
113 }
114
115 /** Takes the pitch vector and the decoded residual vector, computes the gain
116 that will give ||p+g*y||=1 and mixes the residual with the pitch. */
normalise_residual(int * OPUS_RESTRICT iy,celt_norm * OPUS_RESTRICT X,int N,opus_val32 Ryy,opus_val16 gain)117 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
118 int N, opus_val32 Ryy, opus_val16 gain)
119 {
120 int i;
121 #ifdef FIXED_POINT
122 int k;
123 #endif
124 opus_val32 t;
125 opus_val16 g;
126
127 #ifdef FIXED_POINT
128 k = celt_ilog2(Ryy)>>1;
129 #endif
130 t = VSHR32(Ryy, 2*(k-7));
131 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
132
133 i=0;
134 do
135 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
136 while (++i < N);
137 }
138
extract_collapse_mask(int * iy,int N,int B)139 static unsigned extract_collapse_mask(int *iy, int N, int B)
140 {
141 unsigned collapse_mask;
142 int N0;
143 int i;
144 if (B<=1)
145 return 1;
146 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
147 exp_rotation().*/
148 N0 = celt_udiv(N, B);
149 collapse_mask = 0;
150 i=0; do {
151 int j;
152 unsigned tmp=0;
153 j=0; do {
154 tmp |= iy[i*N0+j];
155 } while (++j<N0);
156 collapse_mask |= (tmp!=0)<<i;
157 } while (++i<B);
158 return collapse_mask;
159 }
160
alg_quant(celt_norm * X,int N,int K,int spread,int B,ec_enc * enc,opus_val16 gain)161 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
162 #ifdef RESYNTH
163 , opus_val16 gain
164 #endif
165 )
166 {
167 VARDECL(celt_norm, y);
168 VARDECL(int, iy);
169 VARDECL(opus_val16, signx);
170 int i, j;
171 opus_val16 s;
172 int pulsesLeft;
173 opus_val32 sum;
174 opus_val32 xy;
175 opus_val16 yy;
176 unsigned collapse_mask;
177 SAVE_STACK;
178
179 celt_assert2(K>0, "alg_quant() needs at least one pulse");
180 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
181
182 ALLOC(y, N, celt_norm);
183 ALLOC(iy, N, int);
184 ALLOC(signx, N, opus_val16);
185
186 exp_rotation(X, N, 1, B, K, spread);
187
188 /* Get rid of the sign */
189 sum = 0;
190 j=0; do {
191 if (X[j]>0)
192 signx[j]=1;
193 else {
194 signx[j]=-1;
195 X[j]=-X[j];
196 }
197 iy[j] = 0;
198 y[j] = 0;
199 } while (++j<N);
200
201 xy = yy = 0;
202
203 pulsesLeft = K;
204
205 /* Do a pre-search by projecting on the pyramid */
206 if (K > (N>>1))
207 {
208 opus_val16 rcp;
209 j=0; do {
210 sum += X[j];
211 } while (++j<N);
212
213 /* If X is too small, just replace it with a pulse at 0 */
214 #ifdef FIXED_POINT
215 if (sum <= K)
216 #else
217 /* Prevents infinities and NaNs from causing too many pulses
218 to be allocated. 64 is an approximation of infinity here. */
219 if (!(sum > EPSILON && sum < 64))
220 #endif
221 {
222 X[0] = QCONST16(1.f,14);
223 j=1; do
224 X[j]=0;
225 while (++j<N);
226 sum = QCONST16(1.f,14);
227 }
228 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
229 j=0; do {
230 #ifdef FIXED_POINT
231 /* It's really important to round *towards zero* here */
232 iy[j] = MULT16_16_Q15(X[j],rcp);
233 #else
234 iy[j] = (int)floor(rcp*X[j]);
235 #endif
236 y[j] = (celt_norm)iy[j];
237 yy = MAC16_16(yy, y[j],y[j]);
238 xy = MAC16_16(xy, X[j],y[j]);
239 y[j] *= 2;
240 pulsesLeft -= iy[j];
241 } while (++j<N);
242 }
243 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
244
245 /* This should never happen, but just in case it does (e.g. on silence)
246 we fill the first bin with pulses. */
247 #ifdef FIXED_POINT_DEBUG
248 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
249 #endif
250 if (pulsesLeft > N+3)
251 {
252 opus_val16 tmp = (opus_val16)pulsesLeft;
253 yy = MAC16_16(yy, tmp, tmp);
254 yy = MAC16_16(yy, tmp, y[0]);
255 iy[0] += pulsesLeft;
256 pulsesLeft=0;
257 }
258
259 s = 1;
260 for (i=0;i<pulsesLeft;i++)
261 {
262 int best_id;
263 opus_val32 best_num = -VERY_LARGE16;
264 opus_val16 best_den = 0;
265 #ifdef FIXED_POINT
266 int rshift;
267 #endif
268 #ifdef FIXED_POINT
269 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
270 #endif
271 best_id = 0;
272 /* The squared magnitude term gets added anyway, so we might as well
273 add it outside the loop */
274 yy = ADD16(yy, 1);
275 j=0;
276 do {
277 opus_val16 Rxy, Ryy;
278 /* Temporary sums of the new pulse(s) */
279 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
280 /* We're multiplying y[j] by two so we don't have to do it here */
281 Ryy = ADD16(yy, y[j]);
282
283 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
284 Rxy is positive because the sign is pre-computed) */
285 Rxy = MULT16_16_Q15(Rxy,Rxy);
286 /* The idea is to check for num/den >= best_num/best_den, but that way
287 we can do it without any division */
288 /* OPT: Make sure to use conditional moves here */
289 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
290 {
291 best_den = Ryy;
292 best_num = Rxy;
293 best_id = j;
294 }
295 } while (++j<N);
296
297 /* Updating the sums of the new pulse(s) */
298 xy = ADD32(xy, EXTEND32(X[best_id]));
299 /* We're multiplying y[j] by two so we don't have to do it here */
300 yy = ADD16(yy, y[best_id]);
301
302 /* Only now that we've made the final choice, update y/iy */
303 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
304 y[best_id] += 2*s;
305 iy[best_id]++;
306 }
307
308 /* Put the original sign back */
309 j=0;
310 do {
311 X[j] = MULT16_16(signx[j],X[j]);
312 if (signx[j] < 0)
313 iy[j] = -iy[j];
314 } while (++j<N);
315 encode_pulses(iy, N, K, enc);
316
317 #ifdef RESYNTH
318 normalise_residual(iy, X, N, yy, gain);
319 exp_rotation(X, N, -1, B, K, spread);
320 #endif
321
322 collapse_mask = extract_collapse_mask(iy, N, B);
323 RESTORE_STACK;
324 return collapse_mask;
325 }
326
327 /** Decode pulse vector and combine the result with the pitch vector to produce
328 the final normalised signal in the current band. */
alg_unquant(celt_norm * X,int N,int K,int spread,int B,ec_dec * dec,opus_val16 gain)329 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
330 ec_dec *dec, opus_val16 gain)
331 {
332 opus_val32 Ryy;
333 unsigned collapse_mask;
334 VARDECL(int, iy);
335 SAVE_STACK;
336
337 celt_assert2(K>0, "alg_unquant() needs at least one pulse");
338 celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
339 ALLOC(iy, N, int);
340 Ryy = decode_pulses(iy, N, K, dec);
341 normalise_residual(iy, X, N, Ryy, gain);
342 exp_rotation(X, N, -1, B, K, spread);
343 collapse_mask = extract_collapse_mask(iy, N, B);
344 RESTORE_STACK;
345 return collapse_mask;
346 }
347
348 #ifndef OVERRIDE_renormalise_vector
renormalise_vector(celt_norm * X,int N,opus_val16 gain,int arch)349 void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
350 {
351 int i;
352 #ifdef FIXED_POINT
353 int k;
354 #endif
355 opus_val32 E;
356 opus_val16 g;
357 opus_val32 t;
358 celt_norm *xptr;
359 E = EPSILON + celt_inner_prod(X, X, N, arch);
360 #ifdef FIXED_POINT
361 k = celt_ilog2(E)>>1;
362 #endif
363 t = VSHR32(E, 2*(k-7));
364 g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
365
366 xptr = X;
367 for (i=0;i<N;i++)
368 {
369 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
370 xptr++;
371 }
372 /*return celt_sqrt(E);*/
373 }
374 #endif /* OVERRIDE_renormalise_vector */
375
stereo_itheta(const celt_norm * X,const celt_norm * Y,int stereo,int N,int arch)376 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
377 {
378 int i;
379 int itheta;
380 opus_val16 mid, side;
381 opus_val32 Emid, Eside;
382
383 Emid = Eside = EPSILON;
384 if (stereo)
385 {
386 for (i=0;i<N;i++)
387 {
388 celt_norm m, s;
389 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
390 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
391 Emid = MAC16_16(Emid, m, m);
392 Eside = MAC16_16(Eside, s, s);
393 }
394 } else {
395 Emid += celt_inner_prod(X, X, N, arch);
396 Eside += celt_inner_prod(Y, Y, N, arch);
397 }
398 mid = celt_sqrt(Emid);
399 side = celt_sqrt(Eside);
400 #ifdef FIXED_POINT
401 /* 0.63662 = 2/pi */
402 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
403 #else
404 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
405 #endif
406
407 return itheta;
408 }
409