1 /*
2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
12
13 #include <math.h>
14 #include <memory.h>
15 #include <string.h>
16 #ifdef WEBRTC_ANDROID
17 #include <stdlib.h>
18 #endif
19
20 #include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
21 #include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
22 #include "rtc_base/system/ignore_warnings.h"
23
24 static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
25 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
26
27 /* interpolation filter */
IntrepolFilter(double * data_ptr,double * intrp)28 __inline static void IntrepolFilter(double *data_ptr, double *intrp)
29 {
30 *intrp = kInterpolWin[0] * data_ptr[-3];
31 *intrp += kInterpolWin[1] * data_ptr[-2];
32 *intrp += kInterpolWin[2] * data_ptr[-1];
33 *intrp += kInterpolWin[3] * data_ptr[0];
34 *intrp += kInterpolWin[4] * data_ptr[1];
35 *intrp += kInterpolWin[5] * data_ptr[2];
36 *intrp += kInterpolWin[6] * data_ptr[3];
37 *intrp += kInterpolWin[7] * data_ptr[4];
38 }
39
40
41 /* 2D parabolic interpolation */
42 /* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */
Intrpol2D(double T[3][3],double * x,double * y,double * peak_val)43 __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val)
44 {
45 double c, b[2], A[2][2];
46 double t1, t2, d;
47 double delta1, delta2;
48
49
50 // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
51 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
52
53 c = T[1][1];
54 b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]);
55 b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]);
56 A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]);
57 t1 = 0.5 * (T[0][0] + T[2][2]) - c;
58 t2 = 0.5 * (T[2][0] + T[0][2]) - c;
59 d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2;
60 A[0][0] = -t1 - 0.5 * d;
61 A[1][1] = -t2 - 0.5 * d;
62
63 /* deal with singularities or ill-conditioned cases */
64 if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) {
65 *peak_val = T[1][1];
66 return;
67 }
68
69 /* Cholesky decomposition: replace A by upper-triangular factor */
70 A[0][0] = sqrt(A[0][0]);
71 A[0][1] = A[0][1] / A[0][0];
72 A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]);
73
74 /* compute [x; y] = -0.5 * inv(A) * b */
75 t1 = b[0] / A[0][0];
76 t2 = (b[1] - t1 * A[0][1]) / A[1][1];
77 delta2 = t2 / A[1][1];
78 delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0];
79 delta2 *= 0.5;
80
81 /* limit norm */
82 t1 = delta1 * delta1 + delta2 * delta2;
83 if (t1 > 1.0) {
84 delta1 /= t1;
85 delta2 /= t1;
86 }
87
88 *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c;
89
90 *x += delta1;
91 *y += delta2;
92 }
93
94
PCorr(const double * in,double * outcorr)95 static void PCorr(const double *in, double *outcorr)
96 {
97 double sum, ysum, prod;
98 const double *x, *inptr;
99 int k, n;
100
101 //ysum = 1e-6; /* use this with float (i.s.o. double)! */
102 ysum = 1e-13;
103 sum = 0.0;
104 x = in + PITCH_MAX_LAG/2 + 2;
105 for (n = 0; n < PITCH_CORR_LEN2; n++) {
106 ysum += in[n] * in[n];
107 sum += x[n] * in[n];
108 }
109
110 outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */
111 *outcorr = sum / sqrt(ysum);
112
113 for (k = 1; k < PITCH_LAG_SPAN2; k++) {
114 ysum -= in[k-1] * in[k-1];
115 ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1];
116 sum = 0.0;
117 inptr = &in[k];
118 prod = x[0] * inptr[0];
119 for (n = 1; n < PITCH_CORR_LEN2; n++) {
120 sum += prod;
121 prod = x[n] * inptr[n];
122 }
123 sum += prod;
124 outcorr--;
125 *outcorr = sum / sqrt(ysum);
126 }
127 }
128
WebRtcIsac_AllpassFilterForDec(double * InOut,const double * APSectionFactors,size_t lengthInOut,double * FilterState)129 static void WebRtcIsac_AllpassFilterForDec(double* InOut,
130 const double* APSectionFactors,
131 size_t lengthInOut,
132 double* FilterState) {
133 // This performs all-pass filtering--a series of first order all-pass
134 // sections are used to filter the input in a cascade manner.
135 size_t n, j;
136 double temp;
137 for (j = 0; j < ALLPASSSECTIONS; j++) {
138 for (n = 0; n < lengthInOut; n += 2) {
139 temp = InOut[n]; // store input
140 InOut[n] = FilterState[j] + APSectionFactors[j] * temp;
141 FilterState[j] = -APSectionFactors[j] * InOut[n] + temp;
142 }
143 }
144 }
145
WebRtcIsac_DecimateAllpass(const double * in,double * state_in,size_t N,double * out)146 static void WebRtcIsac_DecimateAllpass(
147 const double* in,
148 double* state_in, // array of size: 2*ALLPASSSECTIONS+1
149 size_t N, // number of input samples
150 double* out) { // array of size N/2
151
152 static const double APupper[ALLPASSSECTIONS] = {0.0347, 0.3826};
153 static const double APlower[ALLPASSSECTIONS] = {0.1544, 0.744};
154
155 size_t n;
156 double data_vec[PITCH_FRAME_LEN];
157
158 /* copy input */
159 memcpy(data_vec + 1, in, sizeof(double) * (N - 1));
160
161 data_vec[0] = state_in[2 * ALLPASSSECTIONS]; // the z^(-1) state
162 state_in[2 * ALLPASSSECTIONS] = in[N - 1];
163
164 WebRtcIsac_AllpassFilterForDec(data_vec + 1, APupper, N, state_in);
165 WebRtcIsac_AllpassFilterForDec(data_vec, APlower, N,
166 state_in + ALLPASSSECTIONS);
167
168 for (n = 0; n < N / 2; n++)
169 out[n] = data_vec[2 * n] + data_vec[2 * n + 1];
170 }
171
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()172 RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
173
174 static void WebRtcIsac_InitializePitch(const double* in,
175 const double old_lag,
176 const double old_gain,
177 PitchAnalysisStruct* State,
178 double* lags) {
179 double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2];
180 double ratio, log_lag, gain_bias;
181 double bias;
182 double corrvec1[PITCH_LAG_SPAN2];
183 double corrvec2[PITCH_LAG_SPAN2];
184 int m, k;
185 // Allocating 10 extra entries at the begining of the CorrSurf
186 double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)];
187 double* CorrSurf[2*PITCH_BW+3];
188 double *CorrSurfPtr1, *CorrSurfPtr2;
189 double LagWin[3] = {0.2, 0.5, 0.98};
190 int ind1, ind2, peaks_ind, peak, max_ind;
191 int peaks[PITCH_MAX_NUM_PEAKS];
192 double adj, gain_tmp;
193 double corr, corr_max;
194 double intrp_a, intrp_b, intrp_c, intrp_d;
195 double peak_vals[PITCH_MAX_NUM_PEAKS];
196 double lags1[PITCH_MAX_NUM_PEAKS];
197 double lags2[PITCH_MAX_NUM_PEAKS];
198 double T[3][3];
199 int row;
200
201 for(k = 0; k < 2*PITCH_BW+3; k++)
202 {
203 CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)];
204 }
205 /* reset CorrSurf matrix */
206 memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4)));
207
208 //warnings -DH
209 max_ind = 0;
210 peak = 0;
211
212 /* copy old values from state buffer */
213 memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
214
215 /* decimation; put result after the old values */
216 WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN,
217 &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]);
218
219 /* low-pass filtering */
220 for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++)
221 buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2];
222
223 /* copy end part back into state buffer */
224 memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2));
225
226 /* compute correlation for first and second half of the frame */
227 PCorr(buf_dec, corrvec1);
228 PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2);
229
230 /* bias towards pitch lag of previous frame */
231 log_lag = log(0.5 * old_lag);
232 gain_bias = 4.0 * old_gain * old_gain;
233 if (gain_bias > 0.8) gain_bias = 0.8;
234 for (k = 0; k < PITCH_LAG_SPAN2; k++)
235 {
236 ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag;
237 bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
238 corrvec1[k] *= bias;
239 }
240
241 /* taper correlation functions */
242 for (k = 0; k < 3; k++) {
243 gain_tmp = LagWin[k];
244 corrvec1[k] *= gain_tmp;
245 corrvec2[k] *= gain_tmp;
246 corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
247 corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp;
248 }
249
250 corr_max = 0.0;
251 /* fill middle row of correlation surface */
252 ind1 = 0;
253 ind2 = 0;
254 CorrSurfPtr1 = &CorrSurf[PITCH_BW][2];
255 for (k = 0; k < PITCH_LAG_SPAN2; k++) {
256 corr = corrvec1[ind1++] + corrvec2[ind2++];
257 CorrSurfPtr1[k] = corr;
258 if (corr > corr_max) {
259 corr_max = corr; /* update maximum */
260 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
261 }
262 }
263 /* fill first and last rows of correlation surface */
264 ind1 = 0;
265 ind2 = PITCH_BW;
266 CorrSurfPtr1 = &CorrSurf[0][2];
267 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2];
268 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) {
269 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
270 adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
271 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
272 CorrSurfPtr1[k] = corr;
273 if (corr > corr_max) {
274 corr_max = corr; /* update maximum */
275 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
276 }
277 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
278 CorrSurfPtr2[k] = corr;
279 if (corr > corr_max) {
280 corr_max = corr; /* update maximum */
281 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
282 }
283 }
284 /* fill second and next to last rows of correlation surface */
285 ind1 = 0;
286 ind2 = PITCH_BW-1;
287 CorrSurfPtr1 = &CorrSurf[1][2];
288 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1];
289 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) {
290 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
291 adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
292 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
293 CorrSurfPtr1[k] = corr;
294 if (corr > corr_max) {
295 corr_max = corr; /* update maximum */
296 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
297 }
298 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
299 CorrSurfPtr2[k] = corr;
300 if (corr > corr_max) {
301 corr_max = corr; /* update maximum */
302 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
303 }
304 }
305 /* fill remainder of correlation surface */
306 for (m = 2; m < PITCH_BW; m++) {
307 ind1 = 0;
308 ind2 = PITCH_BW - m; /* always larger than ind1 */
309 CorrSurfPtr1 = &CorrSurf[m][2];
310 CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m];
311 for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) {
312 ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12));
313 adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */
314 corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
315 CorrSurfPtr1[k] = corr;
316 if (corr > corr_max) {
317 corr_max = corr; /* update maximum */
318 max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
319 }
320 corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]);
321 CorrSurfPtr2[k] = corr;
322 if (corr > corr_max) {
323 corr_max = corr; /* update maximum */
324 max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]);
325 }
326 }
327 }
328
329 /* threshold value to qualify as a peak */
330 corr_max *= 0.6;
331
332 peaks_ind = 0;
333 /* find peaks */
334 for (m = 1; m < PITCH_BW+1; m++) {
335 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
336 CorrSurfPtr1 = &CorrSurf[m][2];
337 for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) {
338 corr = CorrSurfPtr1[k];
339 if (corr > corr_max) {
340 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
341 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
342 /* found a peak; store index into matrix */
343 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
344 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
345 }
346 }
347 }
348 }
349 }
350 for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) {
351 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
352 CorrSurfPtr1 = &CorrSurf[m][2];
353 for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) {
354 corr = CorrSurfPtr1[k];
355 if (corr > corr_max) {
356 if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) {
357 if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) {
358 /* found a peak; store index into matrix */
359 peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
360 if (peaks_ind == PITCH_MAX_NUM_PEAKS) break;
361 }
362 }
363 }
364 }
365 }
366
367 if (peaks_ind > 0) {
368 /* examine each peak */
369 CorrSurfPtr1 = &CorrSurf[0][0];
370 for (k = 0; k < peaks_ind; k++) {
371 peak = peaks[k];
372
373 /* compute four interpolated values around current peak */
374 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a);
375 IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b);
376 IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c);
377 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d);
378
379 /* determine maximum of the interpolated values */
380 corr = CorrSurfPtr1[peak];
381 corr_max = intrp_a;
382 if (intrp_b > corr_max) corr_max = intrp_b;
383 if (intrp_c > corr_max) corr_max = intrp_c;
384 if (intrp_d > corr_max) corr_max = intrp_d;
385
386 /* determine where the peak sits and fill a 3x3 matrix around it */
387 row = peak / (PITCH_LAG_SPAN2+4);
388 lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
389 lags2[k] = (double) (lags1[k] + PITCH_BW - row);
390 if ( corr > corr_max ) {
391 T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
392 T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
393 T[1][1] = corr;
394 T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
395 T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
396 T[1][0] = intrp_a;
397 T[0][1] = intrp_b;
398 T[2][1] = intrp_c;
399 T[1][2] = intrp_d;
400 } else {
401 if (intrp_a == corr_max) {
402 lags1[k] -= 0.5;
403 lags2[k] += 0.5;
404 IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]);
405 IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]);
406 T[1][1] = intrp_a;
407 T[0][2] = intrp_b;
408 T[2][2] = intrp_c;
409 T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)];
410 T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
411 T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
412 T[1][2] = corr;
413 } else if (intrp_b == corr_max) {
414 lags1[k] -= 0.5;
415 lags2[k] -= 0.5;
416 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]);
417 T[2][0] = intrp_a;
418 T[1][1] = intrp_b;
419 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]);
420 T[2][2] = intrp_d;
421 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)];
422 T[0][1] = CorrSurfPtr1[peak - 1];
423 T[2][1] = corr;
424 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
425 } else if (intrp_c == corr_max) {
426 lags1[k] += 0.5;
427 lags2[k] += 0.5;
428 T[0][0] = intrp_a;
429 IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]);
430 T[1][1] = intrp_c;
431 T[0][2] = intrp_d;
432 IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]);
433 T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)];
434 T[0][1] = corr;
435 T[2][1] = CorrSurfPtr1[peak + 1];
436 T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
437 } else {
438 lags1[k] += 0.5;
439 lags2[k] -= 0.5;
440 T[0][0] = intrp_b;
441 T[2][0] = intrp_c;
442 T[1][1] = intrp_d;
443 IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]);
444 IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]);
445 T[1][0] = corr;
446 T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)];
447 T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)];
448 T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)];
449 }
450 }
451
452 /* 2D parabolic interpolation gives more accurate lags and peak value */
453 Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]);
454 }
455
456 /* determine the highest peak, after applying a bias towards short lags */
457 corr_max = 0.0;
458 for (k = 0; k < peaks_ind; k++) {
459 corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k]));
460 if (corr > corr_max) {
461 corr_max = corr;
462 peak = k;
463 }
464 }
465
466 lags1[peak] *= 2.0;
467 lags2[peak] *= 2.0;
468
469 if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG;
470 if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG;
471 if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG;
472 if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG;
473
474 /* store lags of highest peak in output array */
475 lags[0] = lags1[peak];
476 lags[1] = lags1[peak];
477 lags[2] = lags2[peak];
478 lags[3] = lags2[peak];
479 }
480 else
481 {
482 row = max_ind / (PITCH_LAG_SPAN2+4);
483 lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4);
484 lags2[0] = (double) (lags1[0] + PITCH_BW - row);
485
486 if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG;
487 if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG;
488 if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG;
489 if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG;
490
491 /* store lags of highest peak in output array */
492 lags[0] = lags1[0];
493 lags[1] = lags1[0];
494 lags[2] = lags2[0];
495 lags[3] = lags2[0];
496 }
497 }
498
499 RTC_POP_IGNORING_WFRAME_LARGER_THAN()
500
501 /* create weighting matrix by orthogonalizing a basis of polynomials of increasing order
502 * t = (0:4)';
503 * A = [t.^0, t.^1, t.^2, t.^3, t.^4];
504 * [Q, dummy] = qr(A);
505 * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
506 static const double kWeight[5][5] = {
507 { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286},
508 {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857},
509 {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286},
510 { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143},
511 { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286}
512 };
513
514 /* second order high-pass filter */
WebRtcIsac_Highpass(const double * in,double * out,double * state,size_t N)515 static void WebRtcIsac_Highpass(const double* in,
516 double* out,
517 double* state,
518 size_t N) {
519 /* create high-pass filter ocefficients
520 * z = 0.998 * exp(j*2*pi*35/8000);
521 * p = 0.94 * exp(j*2*pi*140/8000);
522 * HP_b = [1, -2*real(z), abs(z)^2];
523 * HP_a = [1, -2*real(p), abs(p)^2]; */
524 static const double a_coef[2] = { 1.86864659625574, -0.88360000000000};
525 static const double b_coef[2] = {-1.99524591718270, 0.99600400000000};
526
527 size_t k;
528
529 for (k=0; k<N; k++) {
530 *out = *in + state[1];
531 state[1] = state[0] + b_coef[0] * *in + a_coef[0] * *out;
532 state[0] = b_coef[1] * *in++ + a_coef[1] * *out++;
533 }
534 }
535
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()536 RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
537
538 void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */
539 double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
540 PitchAnalysisStruct *State,
541 double *lags,
542 double *gains)
543 {
544 double HPin[PITCH_FRAME_LEN];
545 double Weighted[PITCH_FRAME_LEN];
546 double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
547 double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
548 double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead
549 double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
550 double old_lag, old_gain;
551 double nrg_wht, tmp;
552 double Wnrg, Wfluct, Wgain;
553 double H[4][4];
554 double grad[4];
555 double dG[4];
556 int k, m, n, iter;
557
558 /* high pass filtering using second order pole-zero filter */
559 WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN);
560
561 /* copy from state into buffer */
562 memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
563
564 /* compute weighted and whitened signals */
565 WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr));
566
567 /* copy from buffer into state */
568 memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD);
569
570 old_lag = State->PFstr_wght.oldlagp[0];
571 old_gain = State->PFstr_wght.oldgainp[0];
572
573 /* inital pitch estimate */
574 WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
575
576
577 /* Iterative optimization of lags - to be done */
578
579 /* compute energy of whitened signal */
580 nrg_wht = 0.0;
581 for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
582 nrg_wht += Whitened[k] * Whitened[k];
583
584
585 /* Iterative optimization of gains */
586
587 /* set weights for energy, gain fluctiation, and spectral gain penalty functions */
588 Wnrg = 1.0 / nrg_wht;
589 Wgain = 0.005;
590 Wfluct = 3.0;
591
592 /* set initial gains */
593 for (k = 0; k < 4; k++)
594 gains[k] = PITCH_MAX_GAIN_06;
595
596 /* two iterations should be enough */
597 for (iter = 0; iter < 2; iter++) {
598 /* compute Jacobian of pre-filter output towards gains */
599 WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains);
600
601 /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */
602 for (k = 0; k < 4; k++) {
603 tmp = 0.0;
604 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
605 tmp += out_G[n] * out_dG[k][n];
606 grad[k] = tmp * Wnrg;
607 }
608 for (k = 0; k < 4; k++) {
609 for (m = 0; m <= k; m++) {
610 tmp = 0.0;
611 for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
612 tmp += out_dG[m][n] * out_dG[k][n];
613 H[k][m] = tmp * Wnrg;
614 }
615 }
616
617 /* add gradient and Hessian (lower triangle) for dampening fast gain changes */
618 for (k = 0; k < 4; k++) {
619 tmp = kWeight[k+1][0] * old_gain;
620 for (m = 0; m < 4; m++)
621 tmp += kWeight[k+1][m+1] * gains[m];
622 grad[k] += tmp * Wfluct;
623 }
624 for (k = 0; k < 4; k++) {
625 for (m = 0; m <= k; m++) {
626 H[k][m] += kWeight[k+1][m+1] * Wfluct;
627 }
628 }
629
630 /* add gradient and Hessian for dampening gain */
631 for (k = 0; k < 3; k++) {
632 tmp = 1.0 / (1 - gains[k]);
633 grad[k] += tmp * tmp * Wgain;
634 H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain);
635 }
636 tmp = 1.0 / (1 - gains[3]);
637 grad[3] += 1.33 * (tmp * tmp * Wgain);
638 H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
639
640
641 /* compute Cholesky factorization of Hessian
642 * by overwritting the upper triangle; scale factors on diagonal
643 * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */
644 H[0][1] = H[1][0] / H[0][0];
645 H[0][2] = H[2][0] / H[0][0];
646 H[0][3] = H[3][0] / H[0][0];
647 H[1][1] -= H[0][0] * H[0][1] * H[0][1];
648 H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
649 H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
650 H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
651 H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
652 H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3];
653
654 /* Compute update as delta_gains = -inv(H) * grad */
655 /* copy and negate */
656 for (k = 0; k < 4; k++)
657 dG[k] = -grad[k];
658 /* back substitution */
659 dG[1] -= dG[0] * H[0][1];
660 dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2];
661 dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3];
662 /* scale */
663 for (k = 0; k < 4; k++)
664 dG[k] /= H[k][k];
665 /* back substitution */
666 dG[2] -= dG[3] * H[2][3];
667 dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2];
668 dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1];
669
670 /* update gains and check range */
671 for (k = 0; k < 4; k++) {
672 gains[k] += dG[k];
673 if (gains[k] > PITCH_MAX_GAIN)
674 gains[k] = PITCH_MAX_GAIN;
675 else if (gains[k] < 0.0)
676 gains[k] = 0.0;
677 }
678 }
679
680 /* update state for next frame */
681 WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains);
682
683 /* concatenate previous input's end and current input */
684 memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD);
685 memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN);
686
687 /* lookahead pitch filtering for masking analysis */
688 WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains);
689
690 /* store last part of input */
691 for (k = 0; k < QLOOKAHEAD; k++)
692 State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN];
693 }
694
695 RTC_POP_IGNORING_WFRAME_LARGER_THAN()
696