1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "pitch_estimator.h"
12
13 #include <math.h>
14 #include <memory.h>
15 #include <string.h>
16 #ifdef WEBRTC_ANDROID
17 #include <stdlib.h>
18 #endif
19
20 static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
21 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
22
23 /* interpolation filter */
IntrepolFilter(double * data_ptr,double * intrp)24 __inline static void IntrepolFilter(double *data_ptr, double *intrp)
25 {
26 *intrp = kInterpolWin[0] * data_ptr[-3];
27 *intrp += kInterpolWin[1] * data_ptr[-2];
28 *intrp += kInterpolWin[2] * data_ptr[-1];
29 *intrp += kInterpolWin[3] * data_ptr[0];
30 *intrp += kInterpolWin[4] * data_ptr[1];
31 *intrp += kInterpolWin[5] * data_ptr[2];
32 *intrp += kInterpolWin[6] * data_ptr[3];
33 *intrp += kInterpolWin[7] * data_ptr[4];
34 }
35
36
37 /* 2D parabolic interpolation */
38 /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
Intrpol2D(double T[3][3],double * x,double * y,double * peak_val)39 __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
40 {
41 double c, b[2], A[2][2];
42 double t1, t2, d;
43 double delta1, delta2;
44
45
46 // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
47 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
48
49 c = T[1][1];
50 b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
51 b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
52 A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
53 t1 = 0.5 * (T[0][0] + T[2][2]) - c;
54 t2 = 0.5 * (T[2][0] + T[0][2]) - c;
55 d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
56 A[0][0] = -t1 - 0.5 * d;
57 A[1][1] = -t2 - 0.5 * d;
58
59 /* deal with singularities or ill-conditioned cases */
60 if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
61 *peak_val = T[1][1];
62 return;
63 }
64
65 /* Cholesky decomposition: replace A by upper-triangular factor */
66 A[0][0] = sqrt(A[0][0]);
67 A[0][1] = A[0][1] / A[0][0];
68 A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
69
70 /* compute [x; y] = -0.5 * inv(A) * b */
71 t1 = b[0] / A[0][0];
72 t2 = (b[1] - t1 * A[0][1]) / A[1][1];
73 delta2 = t2 / A[1][1];
74 delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
75 delta2 *= 0.5;
76
77 /* limit norm */
78 t1 = delta1 * delta1 + delta2 * delta2;
79 if (t1 > 1.0) {
80 delta1 /= t1;
81 delta2 /= t1;
82 }
83
84 *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
85
86 *x += delta1;
87 *y += delta2;
88 }
89
90
PCorr(const double * in,double * outcorr)91 static void PCorr(const double *in, double *outcorr)
92 {
93 double sum, ysum, prod;
94 const double *x, *inptr;
95 int k, n;
96
97 //ysum = 1e-6; /* use this with float (i.s.o. double)! */
98 ysum = 1e-13;
99 sum = 0.0;
100 x = in + PITCH_MAX_LAG/2 + 2;
101 for (n = 0; n < PITCH_CORR_LEN2; n++) {
102 ysum += in[n] * in[n];
103 sum += x[n] * in[n];
104 }
105
106 outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
107 *outcorr = sum / sqrt(ysum);
108
109 for (k = 1; k < PITCH_LAG_SPAN2; k++) {
110 ysum -= in[k-1] * in[k-1];
111 ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
112 sum = 0.0;
113 inptr = &in[k];
114 prod = x[0] * inptr[0];
115 for (n = 1; n < PITCH_CORR_LEN2; n++) {
116 sum += prod;
117 prod = x[n] * inptr[n];
118 }
119 sum += prod;
120 outcorr--;
121 *outcorr = sum / sqrt(ysum);
122 }
123 }
124
125
WebRtcIsac_InitializePitch(const double * in,const double old_lag,const double old_gain,PitchAnalysisStruct * State,double * lags)126 void WebRtcIsac_InitializePitch(const double *in,
127 const double old_lag,
128 const double old_gain,
129 PitchAnalysisStruct *State,
130 double *lags)
131 {
132 double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
133 double ratio, log_lag, gain_bias;
134 double bias;
135 double corrvec1[PITCH_LAG_SPAN2];
136 double corrvec2[PITCH_LAG_SPAN2];
137 int m, k;
138 // Allocating 10 extra entries at the begining of the CorrSurf
139 double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
140 double* CorrSurf[2*PITCH_BW+3];
141 double *CorrSurfPtr1, *CorrSurfPtr2;
142 double LagWin[3] = {0.2, 0.5, 0.98};
143 int ind1, ind2, peaks_ind, peak, max_ind;
144 int peaks[PITCH_MAX_NUM_PEAKS];
145 double adj, gain_tmp;
146 double corr, corr_max;
147 double intrp_a, intrp_b, intrp_c, intrp_d;
148 double peak_vals[PITCH_MAX_NUM_PEAKS];
149 double lags1[PITCH_MAX_NUM_PEAKS];
150 double lags2[PITCH_MAX_NUM_PEAKS];
151 double T[3][3];
152 int row;
153
154 for(k = 0; k < 2*PITCH_BW+3; k++)
155 {
156 CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
157 }
158 /* reset CorrSurf matrix */
159 memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
160
161 //warnings -DH
162 max_ind = 0;
163 peak = 0;
164
165 /* copy old values from state buffer */
166 memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
167
168 /* decimation; put result after the old values */
169 WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
170 &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
171
172 /* low-pass filtering */
173 for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
174 buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
175
176 /* copy end part back into state buffer */
177 memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
178
179 /* compute correlation for first and second half of the frame */
180 PCorr(buf_dec, corrvec1);
181 PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
182
183 /* bias towards pitch lag of previous frame */
184 log_lag = log(0.5 * old_lag);
185 gain_bias = 4.0 * old_gain * old_gain;
186 if (gain_bias > 0.8) gain_bias = 0.8;
187 for (k = 0; k < PITCH_LAG_SPAN2; k++)
188 {
189 ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
190 bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
191 corrvec1[k] *= bias;
192 }
193
194 /* taper correlation functions */
195 for (k = 0; k < 3; k++) {
196 gain_tmp = LagWin[k];
197 corrvec1[k] *= gain_tmp;
198 corrvec2[k] *= gain_tmp;
199 corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
200 corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
201 }
202
203 corr_max = 0.0;
204 /* fill middle row of correlation surface */
205 ind1 = 0;
206 ind2 = 0;
207 CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
208 for (k = 0; k < PITCH_LAG_SPAN2; k++) {
209 corr = corrvec1[ind1++] + corrvec2[ind2++];
210 CorrSurfPtr1[k] = corr;
211 if (corr > corr_max) {
212 corr_max = corr; /* update maximum */
213 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
214 }
215 }
216 /* fill first and last rows of correlation surface */
217 ind1 = 0;
218 ind2 = PITCH_BW;
219 CorrSurfPtr1 = &CorrSurf[0][2];
220 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
221 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
222 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
223 adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
224 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
225 CorrSurfPtr1[k] = corr;
226 if (corr > corr_max) {
227 corr_max = corr; /* update maximum */
228 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
229 }
230 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
231 CorrSurfPtr2[k] = corr;
232 if (corr > corr_max) {
233 corr_max = corr; /* update maximum */
234 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
235 }
236 }
237 /* fill second and next to last rows of correlation surface */
238 ind1 = 0;
239 ind2 = PITCH_BW-1;
240 CorrSurfPtr1 = &CorrSurf[1][2];
241 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
242 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
243 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
244 adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
245 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
246 CorrSurfPtr1[k] = corr;
247 if (corr > corr_max) {
248 corr_max = corr; /* update maximum */
249 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
250 }
251 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
252 CorrSurfPtr2[k] = corr;
253 if (corr > corr_max) {
254 corr_max = corr; /* update maximum */
255 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
256 }
257 }
258 /* fill remainder of correlation surface */
259 for (m = 2; m < PITCH_BW; m++) {
260 ind1 = 0;
261 ind2 = PITCH_BW - m; /* always larger than ind1 */
262 CorrSurfPtr1 = &CorrSurf[m][2];
263 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
264 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
265 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
266 adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
267 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
268 CorrSurfPtr1[k] = corr;
269 if (corr > corr_max) {
270 corr_max = corr; /* update maximum */
271 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
272 }
273 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
274 CorrSurfPtr2[k] = corr;
275 if (corr > corr_max) {
276 corr_max = corr; /* update maximum */
277 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
278 }
279 }
280 }
281
282 /* threshold value to qualify as a peak */
283 corr_max *= 0.6;
284
285 peaks_ind = 0;
286 /* find peaks */
287 for (m = 1; m < PITCH_BW+1; m++) {
288 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
289 CorrSurfPtr1 = &CorrSurf[m][2];
290 for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
291 corr = CorrSurfPtr1[k];
292 if (corr > corr_max) {
293 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
294 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
295 /* found a peak; store index into matrix */
296 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
297 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
298 }
299 }
300 }
301 }
302 }
303 for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
304 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
305 CorrSurfPtr1 = &CorrSurf[m][2];
306 for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
307 corr = CorrSurfPtr1[k];
308 if (corr > corr_max) {
309 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
310 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
311 /* found a peak; store index into matrix */
312 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
313 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
314 }
315 }
316 }
317 }
318 }
319
320 if (peaks_ind > 0) {
321 /* examine each peak */
322 CorrSurfPtr1 = &CorrSurf[0][0];
323 for (k = 0; k < peaks_ind; k++) {
324 peak = peaks[k];
325
326 /* compute four interpolated values around current peak */
327 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
328 IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
329 IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
330 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
331
332 /* determine maximum of the interpolated values */
333 corr = CorrSurfPtr1[peak];
334 corr_max = intrp_a;
335 if (intrp_b > corr_max) corr_max = intrp_b;
336 if (intrp_c > corr_max) corr_max = intrp_c;
337 if (intrp_d > corr_max) corr_max = intrp_d;
338
339 /* determine where the peak sits and fill a 3x3 matrix around it */
340 row = peak / (PITCH_LAG_SPAN2+4);
341 lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
342 lags2[k] = (double) (lags1[k] + PITCH_BW - row);
343 if ( corr > corr_max ) {
344 T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
345 T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
346 T[1][1] = corr;
347 T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
348 T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
349 T[1][0] = intrp_a;
350 T[0][1] = intrp_b;
351 T[2][1] = intrp_c;
352 T[1][2] = intrp_d;
353 } else {
354 if (intrp_a == corr_max) {
355 lags1[k] -= 0.5;
356 lags2[k] += 0.5;
357 IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
358 IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
359 T[1][1] = intrp_a;
360 T[0][2] = intrp_b;
361 T[2][2] = intrp_c;
362 T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
363 T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
364 T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
365 T[1][2] = corr;
366 } else if (intrp_b == corr_max) {
367 lags1[k] -= 0.5;
368 lags2[k] -= 0.5;
369 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
370 T[2][0] = intrp_a;
371 T[1][1] = intrp_b;
372 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
373 T[2][2] = intrp_d;
374 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
375 T[0][1] = CorrSurfPtr1[peak - 1];
376 T[2][1] = corr;
377 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
378 } else if (intrp_c == corr_max) {
379 lags1[k] += 0.5;
380 lags2[k] += 0.5;
381 T[0][0] = intrp_a;
382 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
383 T[1][1] = intrp_c;
384 T[0][2] = intrp_d;
385 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
386 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
387 T[0][1] = corr;
388 T[2][1] = CorrSurfPtr1[peak + 1];
389 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
390 } else {
391 lags1[k] += 0.5;
392 lags2[k] -= 0.5;
393 T[0][0] = intrp_b;
394 T[2][0] = intrp_c;
395 T[1][1] = intrp_d;
396 IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
397 IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
398 T[1][0] = corr;
399 T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
400 T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
401 T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
402 }
403 }
404
405 /* 2D parabolic interpolation gives more accurate lags and peak value */
406 Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
407 }
408
409 /* determine the highest peak, after applying a bias towards short lags */
410 corr_max = 0.0;
411 for (k = 0; k < peaks_ind; k++) {
412 corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
413 if (corr > corr_max) {
414 corr_max = corr;
415 peak = k;
416 }
417 }
418
419 lags1[peak] *= 2.0;
420 lags2[peak] *= 2.0;
421
422 if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
423 if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
424 if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
425 if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
426
427 /* store lags of highest peak in output array */
428 lags[0] = lags1[peak];
429 lags[1] = lags1[peak];
430 lags[2] = lags2[peak];
431 lags[3] = lags2[peak];
432 }
433 else
434 {
435 row = max_ind / (PITCH_LAG_SPAN2+4);
436 lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
437 lags2[0] = (double) (lags1[0] + PITCH_BW - row);
438
439 if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
440 if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
441 if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
442 if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
443
444 /* store lags of highest peak in output array */
445 lags[0] = lags1[0];
446 lags[1] = lags1[0];
447 lags[2] = lags2[0];
448 lags[3] = lags2[0];
449 }
450 }
451
452
453
454 /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
455 * t = (0:4)';
456 * A = [t.^0, t.^1, t.^2, t.^3, t.^4];
457 * [Q, dummy] = qr(A);
458 * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
459 static const double kWeight[5][5] = {
460 { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
461 {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
462 {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
463 { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
464 { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
465 };
466
467
WebRtcIsac_PitchAnalysis(const double * in,double * out,PitchAnalysisStruct * State,double * lags,double * gains)468 void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
469 double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
470 PitchAnalysisStruct *State,
471 double *lags,
472 double *gains)
473 {
474 double HPin[PITCH_FRAME_LEN];
475 double Weighted[PITCH_FRAME_LEN];
476 double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
477 double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
478 double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
479 double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
480 double old_lag, old_gain;
481 double nrg_wht, tmp;
482 double Wnrg, Wfluct, Wgain;
483 double H[4][4];
484 double grad[4];
485 double dG[4];
486 int k, m, n, iter;
487
488 /* high pass filtering using second order pole-zero filter */
489 WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
490
491 /* copy from state into buffer */
492 memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
493
494 /* compute weighted and whitened signals */
495 WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
496
497 /* copy from buffer into state */
498 memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
499
500 old_lag = State->PFstr_wght.oldlagp[0];
501 old_gain = State->PFstr_wght.oldgainp[0];
502
503 /* inital pitch estimate */
504 WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
505
506
507 /* Iterative optimization of lags - to be done */
508
509 /* compute energy of whitened signal */
510 nrg_wht = 0.0;
511 for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
512 nrg_wht += Whitened[k] * Whitened[k];
513
514
515 /* Iterative optimization of gains */
516
517 /* set weights for energy, gain fluctiation, and spectral gain penalty functions */
518 Wnrg = 1.0 / nrg_wht;
519 Wgain = 0.005;
520 Wfluct = 3.0;
521
522 /* set initial gains */
523 for (k = 0; k < 4; k++)
524 gains[k] = PITCH_MAX_GAIN_06;
525
526 /* two iterations should be enough */
527 for (iter = 0; iter < 2; iter++) {
528 /* compute Jacobian of pre-filter output towards gains */
529 WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
530
531 /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
532 for (k = 0; k < 4; k++) {
533 tmp = 0.0;
534 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
535 tmp += out_G[n] * out_dG[k][n];
536 grad[k] = tmp * Wnrg;
537 }
538 for (k = 0; k < 4; k++) {
539 for (m = 0; m <= k; m++) {
540 tmp = 0.0;
541 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
542 tmp += out_dG[m][n] * out_dG[k][n];
543 H[k][m] = tmp * Wnrg;
544 }
545 }
546
547 /* add gradient and Hessian (lower triangle) for dampening fast gain changes */
548 for (k = 0; k < 4; k++) {
549 tmp = kWeight[k+1][0] * old_gain;
550 for (m = 0; m < 4; m++)
551 tmp += kWeight[k+1][m+1] * gains[m];
552 grad[k] += tmp * Wfluct;
553 }
554 for (k = 0; k < 4; k++) {
555 for (m = 0; m <= k; m++) {
556 H[k][m] += kWeight[k+1][m+1] * Wfluct;
557 }
558 }
559
560 /* add gradient and Hessian for dampening gain */
561 for (k = 0; k < 3; k++) {
562 tmp = 1.0 / (1 - gains[k]);
563 grad[k] += tmp * tmp * Wgain;
564 H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
565 }
566 tmp = 1.0 / (1 - gains[3]);
567 grad[3] += 1.33 * (tmp * tmp * Wgain);
568 H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
569
570
571 /* compute Cholesky factorization of Hessian
572 * by overwritting the upper triangle; scale factors on diagonal
573 * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
574 H[0][1] = H[1][0] / H[0][0];
575 H[0][2] = H[2][0] / H[0][0];
576 H[0][3] = H[3][0] / H[0][0];
577 H[1][1] -= H[0][0] * H[0][1] * H[0][1];
578 H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
579 H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
580 H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
581 H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
582 H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
583
584 /* Compute update as delta_gains = -inv(H) * grad */
585 /* copy and negate */
586 for (k = 0; k < 4; k++)
587 dG[k] = -grad[k];
588 /* back substitution */
589 dG[1] -= dG[0] * H[0][1];
590 dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
591 dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
592 /* scale */
593 for (k = 0; k < 4; k++)
594 dG[k] /= H[k][k];
595 /* back substitution */
596 dG[2] -= dG[3] * H[2][3];
597 dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
598 dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
599
600 /* update gains and check range */
601 for (k = 0; k < 4; k++) {
602 gains[k] += dG[k];
603 if (gains[k] > PITCH_MAX_GAIN)
604 gains[k] = PITCH_MAX_GAIN;
605 else if (gains[k] < 0.0)
606 gains[k] = 0.0;
607 }
608 }
609
610 /* update state for next frame */
611 WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
612
613 /* concatenate previous input's end and current input */
614 memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
615 memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
616
617 /* lookahead pitch filtering for masking analysis */
618 WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
619
620 /* store last part of input */
621 for (k = 0; k < QLOOKAHEAD; k++)
622 State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
623 }
624