1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "celt_lpc.h"
33 #include "stack_alloc.h"
34 #include "mathops.h"
35 #include "pitch.h"
36
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37 void _celt_lpc(
38 opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
39 const opus_val32 *ac, /* in: [0...p] autocorrelation values */
40 int p
41 )
42 {
43 int i, j;
44 opus_val32 r;
45 opus_val32 error = ac[0];
46 #ifdef FIXED_POINT
47 opus_val32 lpc[LPC_ORDER];
48 #else
49 float *lpc = _lpc;
50 #endif
51
52 OPUS_CLEAR(lpc, p);
53 if (ac[0] != 0)
54 {
55 for (i = 0; i < p; i++) {
56 /* Sum up this iteration's reflection coefficient */
57 opus_val32 rr = 0;
58 for (j = 0; j < i; j++)
59 rr += MULT32_32_Q31(lpc[j],ac[i - j]);
60 rr += SHR32(ac[i + 1],3);
61 r = -frac_div32(SHL32(rr,3), error);
62 /* Update LPC coefficients and total error */
63 lpc[i] = SHR32(r,3);
64 for (j = 0; j < (i+1)>>1; j++)
65 {
66 opus_val32 tmp1, tmp2;
67 tmp1 = lpc[j];
68 tmp2 = lpc[i-1-j];
69 lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
70 lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
71 }
72
73 error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
74 /* Bail out once we get 30 dB gain */
75 #ifdef FIXED_POINT
76 if (error<SHR32(ac[0],10))
77 break;
78 #else
79 if (error<.001f*ac[0])
80 break;
81 #endif
82 }
83 }
84 #ifdef FIXED_POINT
85 for (i=0;i<p;i++)
86 _lpc[i] = ROUND16(lpc[i],16);
87 #endif
88 }
89
90
celt_fir_c(const opus_val16 * _x,const opus_val16 * num,opus_val16 * _y,int N,int ord,opus_val16 * mem,int arch)91 void celt_fir_c(
92 const opus_val16 *_x,
93 const opus_val16 *num,
94 opus_val16 *_y,
95 int N,
96 int ord,
97 opus_val16 *mem,
98 int arch)
99 {
100 int i,j;
101 VARDECL(opus_val16, rnum);
102 VARDECL(opus_val16, x);
103 SAVE_STACK;
104
105 ALLOC(rnum, ord, opus_val16);
106 ALLOC(x, N+ord, opus_val16);
107 for(i=0;i<ord;i++)
108 rnum[i] = num[ord-i-1];
109 for(i=0;i<ord;i++)
110 x[i] = mem[ord-i-1];
111 for (i=0;i<N;i++)
112 x[i+ord]=_x[i];
113 for(i=0;i<ord;i++)
114 mem[i] = _x[N-i-1];
115 #ifdef SMALL_FOOTPRINT
116 (void)arch;
117 for (i=0;i<N;i++)
118 {
119 opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
120 for (j=0;j<ord;j++)
121 {
122 sum = MAC16_16(sum,rnum[j],x[i+j]);
123 }
124 _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
125 }
126 #else
127 for (i=0;i<N-3;i+=4)
128 {
129 opus_val32 sum[4]={0,0,0,0};
130 xcorr_kernel(rnum, x+i, sum, ord, arch);
131 _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
132 _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
133 _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
134 _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
135 }
136 for (;i<N;i++)
137 {
138 opus_val32 sum = 0;
139 for (j=0;j<ord;j++)
140 sum = MAC16_16(sum,rnum[j],x[i+j]);
141 _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
142 }
143 #endif
144 RESTORE_STACK;
145 }
146
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem,int arch)147 void celt_iir(const opus_val32 *_x,
148 const opus_val16 *den,
149 opus_val32 *_y,
150 int N,
151 int ord,
152 opus_val16 *mem,
153 int arch)
154 {
155 #ifdef SMALL_FOOTPRINT
156 int i,j;
157 (void)arch;
158 for (i=0;i<N;i++)
159 {
160 opus_val32 sum = _x[i];
161 for (j=0;j<ord;j++)
162 {
163 sum -= MULT16_16(den[j],mem[j]);
164 }
165 for (j=ord-1;j>=1;j--)
166 {
167 mem[j]=mem[j-1];
168 }
169 mem[0] = ROUND16(sum,SIG_SHIFT);
170 _y[i] = sum;
171 }
172 #else
173 int i,j;
174 VARDECL(opus_val16, rden);
175 VARDECL(opus_val16, y);
176 SAVE_STACK;
177
178 celt_assert((ord&3)==0);
179 ALLOC(rden, ord, opus_val16);
180 ALLOC(y, N+ord, opus_val16);
181 for(i=0;i<ord;i++)
182 rden[i] = den[ord-i-1];
183 for(i=0;i<ord;i++)
184 y[i] = -mem[ord-i-1];
185 for(;i<N+ord;i++)
186 y[i]=0;
187 for (i=0;i<N-3;i+=4)
188 {
189 /* Unroll by 4 as if it were an FIR filter */
190 opus_val32 sum[4];
191 sum[0]=_x[i];
192 sum[1]=_x[i+1];
193 sum[2]=_x[i+2];
194 sum[3]=_x[i+3];
195 xcorr_kernel(rden, y+i, sum, ord, arch);
196
197 /* Patch up the result to compensate for the fact that this is an IIR */
198 y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT);
199 _y[i ] = sum[0];
200 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
201 y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT);
202 _y[i+1] = sum[1];
203 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
204 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
205 y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT);
206 _y[i+2] = sum[2];
207
208 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
209 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
210 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
211 y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT);
212 _y[i+3] = sum[3];
213 }
214 for (;i<N;i++)
215 {
216 opus_val32 sum = _x[i];
217 for (j=0;j<ord;j++)
218 sum -= MULT16_16(rden[j],y[i+j]);
219 y[i+ord] = ROUND16(sum,SIG_SHIFT);
220 _y[i] = sum;
221 }
222 for(i=0;i<ord;i++)
223 mem[i] = _y[N-i-1];
224 RESTORE_STACK;
225 #endif
226 }
227
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)228 int _celt_autocorr(
229 const opus_val16 *x, /* in: [0...n-1] samples x */
230 opus_val32 *ac, /* out: [0...lag-1] ac values */
231 const opus_val16 *window,
232 int overlap,
233 int lag,
234 int n,
235 int arch
236 )
237 {
238 opus_val32 d;
239 int i, k;
240 int fastN=n-lag;
241 int shift;
242 const opus_val16 *xptr;
243 VARDECL(opus_val16, xx);
244 SAVE_STACK;
245 ALLOC(xx, n, opus_val16);
246 celt_assert(n>0);
247 celt_assert(overlap>=0);
248 if (overlap == 0)
249 {
250 xptr = x;
251 } else {
252 for (i=0;i<n;i++)
253 xx[i] = x[i];
254 for (i=0;i<overlap;i++)
255 {
256 xx[i] = MULT16_16_Q15(x[i],window[i]);
257 xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
258 }
259 xptr = xx;
260 }
261 shift=0;
262 #ifdef FIXED_POINT
263 {
264 opus_val32 ac0;
265 ac0 = 1+(n<<7);
266 if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
267 for(i=(n&1);i<n;i+=2)
268 {
269 ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
270 ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
271 }
272
273 shift = celt_ilog2(ac0)-30+10;
274 shift = (shift)/2;
275 if (shift>0)
276 {
277 for(i=0;i<n;i++)
278 xx[i] = PSHR32(xptr[i], shift);
279 xptr = xx;
280 } else
281 shift = 0;
282 }
283 #endif
284 celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
285 for (k=0;k<=lag;k++)
286 {
287 for (i = k+fastN, d = 0; i < n; i++)
288 d = MAC16_16(d, xptr[i], xptr[i-k]);
289 ac[k] += d;
290 }
291 #ifdef FIXED_POINT
292 shift = 2*shift;
293 if (shift<=0)
294 ac[0] += SHL32((opus_int32)1, -shift);
295 if (ac[0] < 268435456)
296 {
297 int shift2 = 29 - EC_ILOG(ac[0]);
298 for (i=0;i<=lag;i++)
299 ac[i] = SHL32(ac[i], shift2);
300 shift -= shift2;
301 } else if (ac[0] >= 536870912)
302 {
303 int shift2=1;
304 if (ac[0] >= 1073741824)
305 shift2++;
306 for (i=0;i<=lag;i++)
307 ac[i] = SHR32(ac[i], shift2);
308 shift += shift2;
309 }
310 #endif
311
312 RESTORE_STACK;
313 return shift;
314 }
315