1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 /*
12 * lattice.c
13 *
14 * Contains the normalized lattice filter routines (MA and AR) for iSAC codec
15 *
16 */
17
18 #include "modules/audio_coding/codecs/isac/fix/source/codec.h"
19 #include "modules/audio_coding/codecs/isac/fix/source/settings.h"
20 #include "rtc_base/sanitizer.h"
21
22 #define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32) \
23 ((int32_t)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32))))
24 /* This macro is FORBIDDEN to use elsewhere than in a function in this file and
25 its corresponding neon version. It might give unpredictable results, since a
26 general int32_t*int32_t multiplication results in a 64 bit value.
27 The result is then shifted just 16 steps to the right, giving need for 48
28 bits, i.e. in the generel case, it will NOT fit in a int32_t. In the
29 cases used in here, the int32_t will be enough, since (for a good
30 reason) the involved multiplicands aren't big enough to overflow a
31 int32_t after shifting right 16 bits. I have compared the result of a
32 multiplication between t32 and tmp32, done in two ways:
33 1) Using (int32_t) (((float)(tmp32))*((float)(tmp32b))/65536.0);
34 2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
35 By running 25 files, I haven't found any bigger diff than 64 - this was in the
36 case when method 1) gave 650235648 and 2) gave 650235712.
37 */
38
39 /* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
40 with coefficients cth_Q15[] and sth_Q15[].
41 Implemented for both generic and ARMv7 platforms.
42 */
43 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
44 int16_t* ar_f_Q0,
45 int16_t* cth_Q15,
46 int16_t* sth_Q15,
47 size_t order_coef);
48
49 /* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does:
50 for 0 <= n < HALF_SUBFRAMELEN - 1:
51 *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
52 *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
53 Note, function WebRtcIsacfix_FilterMaLoopNeon and WebRtcIsacfix_FilterMaLoopC
54 are not bit-exact. The accuracy by the ARM Neon function is same or better.
55 */
WebRtcIsacfix_FilterMaLoopC(int16_t input0,int16_t input1,int32_t input2,int32_t * ptr0,int32_t * ptr1,int32_t * ptr2)56 void WebRtcIsacfix_FilterMaLoopC(int16_t input0, // Filter coefficient
57 int16_t input1, // Filter coefficient
58 int32_t input2, // Inverse coeff. (1/input1)
59 int32_t* ptr0, // Sample buffer
60 int32_t* ptr1, // Sample buffer
61 int32_t* ptr2) { // Sample buffer
62 int n = 0;
63
64 // Separate the 32-bit variable input2 into two 16-bit integers (high 16 and
65 // low 16 bits), for using LATTICE_MUL_32_32_RSFT16 in the loop.
66 int16_t t16a = (int16_t)(input2 >> 16);
67 int16_t t16b = (int16_t)input2;
68 if (t16b < 0) t16a++;
69
70 // The loop filtering the samples *ptr0, *ptr1, *ptr2 with filter coefficients
71 // input0, input1, and input2.
72 for(n = 0; n < HALF_SUBFRAMELEN - 1; n++, ptr0++, ptr1++, ptr2++) {
73 int32_t tmp32a = 0;
74 int32_t tmp32b = 0;
75
76 // Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0));
77 tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0); // Q15 * Q15 >> 15 = Q15
78 tmp32b = *ptr2 + tmp32a; // Q15 + Q15 = Q15
79 *ptr2 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
80
81 // Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
82 tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0); // Q15*Q15>>15 = Q15
83 tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2); // Q15*Q15>>15 = Q15
84 *ptr1 = tmp32a + tmp32b; // Q15 + Q15 = Q15
85 }
86 }
87
88 /* filter the signal using normalized lattice filter */
89 /* MA filter */
WebRtcIsacfix_NormLatticeFilterMa(size_t orderCoef,int32_t * stateGQ15,int16_t * lat_inQ0,int16_t * filt_coefQ15,int32_t * gain_lo_hiQ17,int16_t lo_hi,int16_t * lat_outQ9)90 void WebRtcIsacfix_NormLatticeFilterMa(size_t orderCoef,
91 int32_t *stateGQ15,
92 int16_t *lat_inQ0,
93 int16_t *filt_coefQ15,
94 int32_t *gain_lo_hiQ17,
95 int16_t lo_hi,
96 int16_t *lat_outQ9)
97 {
98 int16_t sthQ15[MAX_AR_MODEL_ORDER];
99 int16_t cthQ15[MAX_AR_MODEL_ORDER];
100
101 int u, n;
102 size_t i, k;
103 int16_t temp2,temp3;
104 size_t ord_1 = orderCoef+1;
105 int32_t inv_cthQ16[MAX_AR_MODEL_ORDER];
106
107 int32_t gain32, fQtmp;
108 int16_t gain16;
109 int16_t gain_sh;
110
111 int32_t tmp32, tmp32b;
112 int32_t fQ15vec[HALF_SUBFRAMELEN];
113 int32_t gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN];
114 int16_t sh;
115 int16_t t16a;
116 int16_t t16b;
117
118 for (u=0;u<SUBFRAMES;u++)
119 {
120 int32_t temp1 = u * HALF_SUBFRAMELEN;
121
122 /* set the Direct Form coefficients */
123 temp2 = (int16_t)(u * orderCoef);
124 temp3 = (int16_t)(2 * u + lo_hi);
125
126 /* compute lattice filter coefficients */
127 memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(int16_t));
128
129 WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
130
131 /* compute the gain */
132 gain32 = gain_lo_hiQ17[temp3];
133 gain_sh = WebRtcSpl_NormW32(gain32);
134 gain32 <<= gain_sh; // Q(17+gain_sh)
135
136 for (k=0;k<orderCoef;k++)
137 {
138 gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh)
139 inv_cthQ16[k] = WebRtcSpl_DivW32W16((int32_t)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16
140 }
141 gain16 = (int16_t)(gain32 >> 16); // Q(1+gain_sh).
142
143 /* normalized lattice filter */
144 /*****************************/
145
146 /* initial conditions */
147 for (i=0;i<HALF_SUBFRAMELEN;i++)
148 {
149 fQ15vec[i] = lat_inQ0[i + temp1] << 15; // Q15
150 gQ15[0][i] = lat_inQ0[i + temp1] << 15; // Q15
151 }
152
153
154 fQtmp = fQ15vec[0];
155
156 /* get the state of f&g for the first input, for all orders */
157 for (i=1;i<ord_1;i++)
158 {
159 // Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]);
160 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15
161 tmp32b= fQtmp + tmp32; //Q15+Q15=Q15
162 tmp32 = inv_cthQ16[i-1]; //Q16
163 t16a = (int16_t)(tmp32 >> 16);
164 t16b = (int16_t)(tmp32 - (t16a << 16));
165 if (t16b<0) t16a++;
166 tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
167 fQtmp = tmp32; // Q15
168
169 // Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0];
170 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15
171 tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15
172 tmp32 = tmp32 + tmp32b;//Q15+Q15 = Q15
173 gQ15[i][0] = tmp32; // Q15
174 }
175
176 /* filtering */
177 /* save the states */
178 for(k=0;k<orderCoef;k++)
179 {
180 // for 0 <= n < HALF_SUBFRAMELEN - 1:
181 // f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]);
182 // g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1];
183 WebRtcIsacfix_FilterMaLoopFix(sthQ15[k], cthQ15[k], inv_cthQ16[k],
184 &gQ15[k][0], &gQ15[k+1][1], &fQ15vec[1]);
185 }
186
187 fQ15vec[0] = fQtmp;
188
189 for(n=0;n<HALF_SUBFRAMELEN;n++)
190 {
191 //gain32 >>= gain_sh; // Q(17+gain_sh) -> Q17
192 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
193 sh = 9-gain_sh; //number of needed shifts to reach Q9
194 t16a = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
195 lat_outQ9[n + temp1] = t16a;
196 }
197
198 /* save the states */
199 for (i=0;i<ord_1;i++)
200 {
201 stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1];
202 }
203 //process next frame
204 }
205
206 return;
207 }
208
209 // Left shift of an int32_t that's allowed to overflow. (It's still undefined
210 // behavior, so not a good idea; this just makes UBSan ignore the violation, so
211 // that our old code can continue to do what it's always been doing.)
212 static inline int32_t RTC_NO_SANITIZE("shift")
OverflowingLShiftS32(int32_t x,int shift)213 OverflowingLShiftS32(int32_t x, int shift) {
214 return x << shift;
215 }
216
217 /* ----------------AR filter-------------------------*/
218 /* filter the signal using normalized lattice filter */
WebRtcIsacfix_NormLatticeFilterAr(size_t orderCoef,int16_t * stateGQ0,int32_t * lat_inQ25,int16_t * filt_coefQ15,int32_t * gain_lo_hiQ17,int16_t lo_hi,int16_t * lat_outQ0)219 void WebRtcIsacfix_NormLatticeFilterAr(size_t orderCoef,
220 int16_t *stateGQ0,
221 int32_t *lat_inQ25,
222 int16_t *filt_coefQ15,
223 int32_t *gain_lo_hiQ17,
224 int16_t lo_hi,
225 int16_t *lat_outQ0)
226 {
227 size_t ii, k, i;
228 int n, u;
229 int16_t sthQ15[MAX_AR_MODEL_ORDER];
230 int16_t cthQ15[MAX_AR_MODEL_ORDER];
231 int32_t tmp32;
232
233
234 int16_t tmpAR;
235 int16_t ARfQ0vec[HALF_SUBFRAMELEN];
236 int16_t ARgQ0vec[MAX_AR_MODEL_ORDER+1];
237
238 int32_t inv_gain32;
239 int16_t inv_gain16;
240 int16_t den16;
241 int16_t sh;
242
243 int16_t temp2,temp3;
244 size_t ord_1 = orderCoef+1;
245
246 for (u=0;u<SUBFRAMES;u++)
247 {
248 int32_t temp1 = u * HALF_SUBFRAMELEN;
249
250 //set the denominator and numerator of the Direct Form
251 temp2 = (int16_t)(u * orderCoef);
252 temp3 = (int16_t)(2 * u + lo_hi);
253
254 for (ii=0; ii<orderCoef; ii++) {
255 sthQ15[ii] = filt_coefQ15[temp2+ii];
256 }
257
258 WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
259
260 // Originally, this line was assumed to never overflow, since "[s]imulation
261 // of the 25 files shows that maximum value in the vector gain_lo_hiQ17[]
262 // is 441344, which means that it is log2((2^31)/441344) = 12.2 shifting
263 // bits from saturation. Therefore, it should be safe to use Q27 instead of
264 // Q17." However, a fuzzer test succeeded in provoking an overflow here,
265 // which we ignore on the theory that only "abnormal" inputs cause
266 // overflow.
267 tmp32 = OverflowingLShiftS32(gain_lo_hiQ17[temp3], 10); // Q27
268
269 for (k=0;k<orderCoef;k++) {
270 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27
271 }
272
273 sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain
274 den16 = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits)
275 inv_gain32 = WebRtcSpl_DivW32W16((int32_t)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh)
276
277 //initial conditions
278 inv_gain16 = (int16_t)(inv_gain32 >> 2); // 1/gain in Q(20-sh-2) = Q(18-sh)
279
280 for (i=0;i<HALF_SUBFRAMELEN;i++)
281 {
282 tmp32 = OverflowingLShiftS32(lat_inQ25[i + temp1], 1); // Q25->Q26
283 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
284 tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
285
286 ARfQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
287 }
288
289 // Get the state of f & g for the first input, for all orders.
290 for (i = orderCoef; i > 0; i--)
291 {
292 tmp32 = (cthQ15[i - 1] * ARfQ0vec[0] - sthQ15[i - 1] * stateGQ0[i - 1] +
293 16384) >> 15;
294 tmpAR = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
295
296 tmp32 = (sthQ15[i - 1] * ARfQ0vec[0] + cthQ15[i - 1] * stateGQ0[i - 1] +
297 16384) >> 15;
298 ARgQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
299 ARfQ0vec[0] = tmpAR;
300 }
301 ARgQ0vec[0] = ARfQ0vec[0];
302
303 // Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
304 WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
305
306 for(n=0;n<HALF_SUBFRAMELEN;n++)
307 {
308 lat_outQ0[n + temp1] = ARfQ0vec[n];
309 }
310
311
312 /* cannot use memcpy in the following */
313
314 for (i=0;i<ord_1;i++)
315 {
316 stateGQ0[i] = ARgQ0vec[i];
317 }
318 }
319
320 return;
321 }
322