1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "celt_lpc.h"
33 #include "stack_alloc.h"
34 #include "mathops.h"
35 #include "pitch.h"
36
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37 void _celt_lpc(
38 opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
39 const opus_val32 *ac, /* in: [0...p] autocorrelation values */
40 int p
41 )
42 {
43 int i, j;
44 opus_val32 r;
45 opus_val32 error = ac[0];
46 #ifdef FIXED_POINT
47 opus_val32 lpc[LPC_ORDER];
48 #else
49 float *lpc = _lpc;
50 #endif
51
52 OPUS_CLEAR(lpc, p);
53 #ifdef FIXED_POINT
54 if (ac[0] != 0)
55 #else
56 if (ac[0] > 1e-10f)
57 #endif
58 {
59 for (i = 0; i < p; i++) {
60 /* Sum up this iteration's reflection coefficient */
61 opus_val32 rr = 0;
62 for (j = 0; j < i; j++)
63 rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64 rr += SHR32(ac[i + 1],6);
65 r = -frac_div32(SHL32(rr,6), error);
66 /* Update LPC coefficients and total error */
67 lpc[i] = SHR32(r,6);
68 for (j = 0; j < (i+1)>>1; j++)
69 {
70 opus_val32 tmp1, tmp2;
71 tmp1 = lpc[j];
72 tmp2 = lpc[i-1-j];
73 lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
74 lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75 }
76
77 error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78 /* Bail out once we get 30 dB gain */
79 #ifdef FIXED_POINT
80 if (error<=SHR32(ac[0],10))
81 break;
82 #else
83 if (error<=.001f*ac[0])
84 break;
85 #endif
86 }
87 }
88 #ifdef FIXED_POINT
89 {
90 /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91 This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92 fixes should also be applied there. */
93 int iter, idx = 0;
94 opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96 for (iter = 0; iter < 10; iter++) {
97 maxabs = 0;
98 for (i = 0; i < p; i++) {
99 absval = ABS32(lpc[i]);
100 if (absval > maxabs) {
101 maxabs = absval;
102 idx = i;
103 }
104 }
105 maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */
106
107 if (maxabs > 32767) {
108 maxabs = MIN32(maxabs, 163838);
109 chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110 SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111 chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113 /* Apply bandwidth expansion. */
114 for (i = 0; i < p - 1; i++) {
115 lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116 chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117 }
118 lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119 } else {
120 break;
121 }
122 }
123
124 if (iter == 10) {
125 /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126 fall back to the A(z)=1 filter. */
127 OPUS_CLEAR(lpc, p);
128 _lpc[0] = 4096; /* Q12 */
129 } else {
130 for (i = 0; i < p; i++) {
131 _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */
132 }
133 }
134 }
135 #endif
136 }
137
138
celt_fir_c(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,int ord,int arch)139 void celt_fir_c(
140 const opus_val16 *x,
141 const opus_val16 *num,
142 opus_val16 *y,
143 int N,
144 int ord,
145 int arch)
146 {
147 int i,j;
148 VARDECL(opus_val16, rnum);
149 SAVE_STACK;
150 celt_assert(x != y);
151 ALLOC(rnum, ord, opus_val16);
152 for(i=0;i<ord;i++)
153 rnum[i] = num[ord-i-1];
154 for (i=0;i<N-3;i+=4)
155 {
156 opus_val32 sum[4];
157 sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
158 sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159 sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160 sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161 xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
162 y[i ] = SROUND16(sum[0], SIG_SHIFT);
163 y[i+1] = SROUND16(sum[1], SIG_SHIFT);
164 y[i+2] = SROUND16(sum[2], SIG_SHIFT);
165 y[i+3] = SROUND16(sum[3], SIG_SHIFT);
166 }
167 for (;i<N;i++)
168 {
169 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
170 for (j=0;j<ord;j++)
171 sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
172 y[i] = SROUND16(sum, SIG_SHIFT);
173 }
174 RESTORE_STACK;
175 }
176
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem,int arch)177 void celt_iir(const opus_val32 *_x,
178 const opus_val16 *den,
179 opus_val32 *_y,
180 int N,
181 int ord,
182 opus_val16 *mem,
183 int arch)
184 {
185 #ifdef SMALL_FOOTPRINT
186 int i,j;
187 (void)arch;
188 for (i=0;i<N;i++)
189 {
190 opus_val32 sum = _x[i];
191 for (j=0;j<ord;j++)
192 {
193 sum -= MULT16_16(den[j],mem[j]);
194 }
195 for (j=ord-1;j>=1;j--)
196 {
197 mem[j]=mem[j-1];
198 }
199 mem[0] = SROUND16(sum, SIG_SHIFT);
200 _y[i] = sum;
201 }
202 #else
203 int i,j;
204 VARDECL(opus_val16, rden);
205 VARDECL(opus_val16, y);
206 SAVE_STACK;
207
208 celt_assert((ord&3)==0);
209 ALLOC(rden, ord, opus_val16);
210 ALLOC(y, N+ord, opus_val16);
211 for(i=0;i<ord;i++)
212 rden[i] = den[ord-i-1];
213 for(i=0;i<ord;i++)
214 y[i] = -mem[ord-i-1];
215 for(;i<N+ord;i++)
216 y[i]=0;
217 for (i=0;i<N-3;i+=4)
218 {
219 /* Unroll by 4 as if it were an FIR filter */
220 opus_val32 sum[4];
221 sum[0]=_x[i];
222 sum[1]=_x[i+1];
223 sum[2]=_x[i+2];
224 sum[3]=_x[i+3];
225 xcorr_kernel(rden, y+i, sum, ord, arch);
226
227 /* Patch up the result to compensate for the fact that this is an IIR */
228 y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
229 _y[i ] = sum[0];
230 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
231 y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
232 _y[i+1] = sum[1];
233 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
234 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
235 y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
236 _y[i+2] = sum[2];
237
238 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
239 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
240 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
241 y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
242 _y[i+3] = sum[3];
243 }
244 for (;i<N;i++)
245 {
246 opus_val32 sum = _x[i];
247 for (j=0;j<ord;j++)
248 sum -= MULT16_16(rden[j],y[i+j]);
249 y[i+ord] = SROUND16(sum,SIG_SHIFT);
250 _y[i] = sum;
251 }
252 for(i=0;i<ord;i++)
253 mem[i] = _y[N-i-1];
254 RESTORE_STACK;
255 #endif
256 }
257
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)258 int _celt_autocorr(
259 const opus_val16 *x, /* in: [0...n-1] samples x */
260 opus_val32 *ac, /* out: [0...lag-1] ac values */
261 const opus_val16 *window,
262 int overlap,
263 int lag,
264 int n,
265 int arch
266 )
267 {
268 opus_val32 d;
269 int i, k;
270 int fastN=n-lag;
271 int shift;
272 const opus_val16 *xptr;
273 VARDECL(opus_val16, xx);
274 SAVE_STACK;
275 ALLOC(xx, n, opus_val16);
276 celt_assert(n>0);
277 celt_assert(overlap>=0);
278 if (overlap == 0)
279 {
280 xptr = x;
281 } else {
282 for (i=0;i<n;i++)
283 xx[i] = x[i];
284 for (i=0;i<overlap;i++)
285 {
286 xx[i] = MULT16_16_Q15(x[i],window[i]);
287 xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
288 }
289 xptr = xx;
290 }
291 shift=0;
292 #ifdef FIXED_POINT
293 {
294 opus_val32 ac0;
295 ac0 = 1+(n<<7);
296 if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
297 for(i=(n&1);i<n;i+=2)
298 {
299 ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
300 ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
301 }
302
303 shift = celt_ilog2(ac0)-30+10;
304 shift = (shift)/2;
305 if (shift>0)
306 {
307 for(i=0;i<n;i++)
308 xx[i] = PSHR32(xptr[i], shift);
309 xptr = xx;
310 } else
311 shift = 0;
312 }
313 #endif
314 celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
315 for (k=0;k<=lag;k++)
316 {
317 for (i = k+fastN, d = 0; i < n; i++)
318 d = MAC16_16(d, xptr[i], xptr[i-k]);
319 ac[k] += d;
320 }
321 #ifdef FIXED_POINT
322 shift = 2*shift;
323 if (shift<=0)
324 ac[0] += SHL32((opus_int32)1, -shift);
325 if (ac[0] < 268435456)
326 {
327 int shift2 = 29 - EC_ILOG(ac[0]);
328 for (i=0;i<=lag;i++)
329 ac[i] = SHL32(ac[i], shift2);
330 shift -= shift2;
331 } else if (ac[0] >= 536870912)
332 {
333 int shift2=1;
334 if (ac[0] >= 1073741824)
335 shift2++;
336 for (i=0;i<=lag;i++)
337 ac[i] = SHR32(ac[i], shift2);
338 shift += shift2;
339 }
340 #endif
341
342 RESTORE_STACK;
343 return shift;
344 }
345