1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <math.h>
12 #include <memory.h>
13 #include <stdlib.h>
14
15 #include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
16 #include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
17 #include "rtc_base/compile_assert_c.h"
18
19 /*
20 * We are implementing the following filters;
21 *
22 * Pre-filtering:
23 * y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
24 *
25 * Post-filtering:
26 * y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
27 *
28 * Note that |lag| is a floating number so we perform an interpolation to
29 * obtain the correct |lag|.
30 *
31 */
32
33 static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
34 -0.07};
35
36 /* interpolation coefficients; generated by design_pitch_filter.m */
37 static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
38 {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125,
39 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109,
40 0.01754159521746},
41 {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643,
42 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084,
43 0.01654127246314},
44 {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787,
45 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311,
46 0.01295781500709},
47 {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393,
48 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166,
49 0.00719783432422},
50 {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001,
51 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000,
52 -0.00000000000000},
53 {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562,
54 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377,
55 -0.00764851320885},
56 {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650,
57 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059,
58 -0.01463300534216},
59 {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
60 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
61 -0.01985640750433}
62 };
63
64 /*
65 * Enumerating the operation of the filter.
66 * iSAC has 4 different pitch-filter which are very similar in their structure.
67 *
68 * kPitchFilterPre : In this mode the filter is operating as pitch
69 * pre-filter. This is used at the encoder.
70 * kPitchFilterPost : In this mode the filter is operating as pitch
71 * post-filter. This is the inverse of pre-filter and used
72 * in the decoder.
73 * kPitchFilterPreLa : This is, in structure, similar to pre-filtering but
74 * utilizing 3 millisecond lookahead. It is used to
75 * obtain the signal for LPC analysis.
76 * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
77 * differential changes in gain is considered. This is
78 * used to find the optimal gain.
79 */
80 typedef enum {
81 kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
82 } PitchFilterOperation;
83
84 /*
85 * Structure with parameters used for pitch-filtering.
86 * buffer : a buffer where the sum of previous inputs and outputs
87 * are stored.
88 * damper_state : the state of the damping filter. The filter is defined by
89 * |kDampFilter|.
90 * interpol_coeff : pointer to a set of coefficient which are used to utilize
91 * fractional pitch by interpolation.
92 * gain : pitch-gain to be applied to the current segment of input.
93 * lag : pitch-lag for the current segment of input.
94 * lag_offset : the offset of lag w.r.t. current sample.
95 * sub_frame : sub-frame index, there are 4 pitch sub-frames in an iSAC
96 * frame.
97 * This specifies the usage of the filter. See
98 * 'PitchFilterOperation' for operational modes.
99 * num_samples : number of samples to be processed in each segment.
100 * index : index of the input and output sample.
101 * damper_state_dg : state of damping filter for different trial gains.
102 * gain_mult : differential changes to gain.
103 */
104 typedef struct {
105 double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
106 double damper_state[PITCH_DAMPORDER];
107 const double *interpol_coeff;
108 double gain;
109 double lag;
110 int lag_offset;
111
112 int sub_frame;
113 PitchFilterOperation mode;
114 int num_samples;
115 int index;
116
117 double damper_state_dg[4][PITCH_DAMPORDER];
118 double gain_mult[4];
119 } PitchFilterParam;
120
121 /**********************************************************************
122 * FilterSegment()
123 * Filter one segment, a quarter of a frame.
124 *
125 * Inputs
126 * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
127 * filter_param : pitch filter parameters.
128 *
129 * Outputs
130 * out_data : pointer to a buffer where the filtered signal is written to.
131 * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
132 * where the output of different gain values (differential
133 * change to gain) is written.
134 */
FilterSegment(const double * in_data,PitchFilterParam * parameters,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD])135 static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
136 double* out_data,
137 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
138 int n;
139 int m;
140 int j;
141 double sum;
142 double sum2;
143 /* Index of |parameters->buffer| where the output is written to. */
144 int pos = parameters->index + PITCH_BUFFSIZE;
145 /* Index of |parameters->buffer| where samples are read for fractional-lag
146 * computation. */
147 int pos_lag = pos - parameters->lag_offset;
148
149 for (n = 0; n < parameters->num_samples; ++n) {
150 /* Shift low pass filter states. */
151 for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
152 parameters->damper_state[m] = parameters->damper_state[m - 1];
153 }
154 /* Filter to get fractional pitch. */
155 sum = 0.0;
156 for (m = 0; m < PITCH_FRACORDER; ++m) {
157 sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
158 }
159 /* Multiply with gain. */
160 parameters->damper_state[0] = parameters->gain * sum;
161
162 if (parameters->mode == kPitchFilterPreGain) {
163 int lag_index = parameters->index - parameters->lag_offset;
164 int m_tmp = (lag_index < 0) ? -lag_index : 0;
165 /* Update the damper state for the new sample. */
166 for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
167 for (j = 0; j < 4; ++j) {
168 parameters->damper_state_dg[j][m] =
169 parameters->damper_state_dg[j][m - 1];
170 }
171 }
172
173 for (j = 0; j < parameters->sub_frame + 1; ++j) {
174 /* Filter for fractional pitch. */
175 sum2 = 0.0;
176 for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
177 /* |lag_index + m| is always larger than or equal to zero, see how
178 * m_tmp is computed. This is equivalent to assume samples outside
179 * |out_dg[j]| are zero. */
180 sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
181 }
182 /* Add the contribution of differential gain change. */
183 parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
184 parameters->gain * sum2;
185 }
186
187 /* Filter with damping filter, and store the results. */
188 for (j = 0; j < parameters->sub_frame + 1; ++j) {
189 sum = 0.0;
190 for (m = 0; m < PITCH_DAMPORDER; ++m) {
191 sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
192 }
193 out_dg[j][parameters->index] = sum;
194 }
195 }
196 /* Filter with damping filter. */
197 sum = 0.0;
198 for (m = 0; m < PITCH_DAMPORDER; ++m) {
199 sum += parameters->damper_state[m] * kDampFilter[m];
200 }
201
202 /* Subtract from input and update buffer. */
203 out_data[parameters->index] = in_data[parameters->index] - sum;
204 parameters->buffer[pos] = in_data[parameters->index] +
205 out_data[parameters->index];
206
207 ++parameters->index;
208 ++pos;
209 ++pos_lag;
210 }
211 return;
212 }
213
214 /* Update filter parameters based on the pitch-gains and pitch-lags. */
Update(PitchFilterParam * parameters)215 static void Update(PitchFilterParam* parameters) {
216 double fraction;
217 int fraction_index;
218 /* Compute integer lag-offset. */
219 parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
220 0.5);
221 /* Find correct set of coefficients for computing fractional pitch. */
222 fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
223 fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
224 parameters->interpol_coeff = kIntrpCoef[fraction_index];
225
226 if (parameters->mode == kPitchFilterPreGain) {
227 /* If in this mode make a differential change to pitch gain. */
228 parameters->gain_mult[parameters->sub_frame] += 0.2;
229 if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
230 parameters->gain_mult[parameters->sub_frame] = 1.0;
231 }
232 if (parameters->sub_frame > 0) {
233 parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
234 }
235 }
236 }
237
238 /******************************************************************************
239 * FilterFrame()
240 * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
241 *
242 * Inputs
243 * in_data : pointer to the input signal of 30 ms at 8 kHz sample-rate.
244 * lags : pointer to pitch-lags, 4 lags per frame.
245 * gains : pointer to pitch-gians, 4 gains per frame.
246 * mode : defining the functionality of the filter. It takes the
247 * following values.
248 * kPitchFilterPre: Pitch pre-filter, used at encoder.
249 * kPitchFilterPost: Pitch post-filter, used at decoder.
250 * kPitchFilterPreLa: Pitch pre-filter with lookahead.
251 * kPitchFilterPreGain: Pitch pre-filter used to otain optimal
252 * pitch-gains.
253 *
254 * Outputs
255 * out_data : pointer to a buffer where the filtered signal is written to.
256 * out_dg : [only used in kPitchFilterPreGain] pointer to a buffer
257 * where the output of different gain values (differential
258 * change to gain) is written.
259 */
FilterFrame(const double * in_data,PitchFiltstr * filter_state,double * lags,double * gains,PitchFilterOperation mode,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD])260 static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
261 double* lags, double* gains, PitchFilterOperation mode,
262 double* out_data,
263 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
264 PitchFilterParam filter_parameters;
265 double gain_delta, lag_delta;
266 double old_lag, old_gain;
267 int n;
268 int m;
269 const double kEnhancer = 1.3;
270
271 /* Set up buffer and states. */
272 filter_parameters.index = 0;
273 filter_parameters.lag_offset = 0;
274 filter_parameters.mode = mode;
275 /* Copy states to local variables. */
276 memcpy(filter_parameters.buffer, filter_state->ubuf,
277 sizeof(filter_state->ubuf));
278 RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
279 sizeof(filter_state->ubuf));
280 memset(filter_parameters.buffer +
281 sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
282 0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
283 memcpy(filter_parameters.damper_state, filter_state->ystate,
284 sizeof(filter_state->ystate));
285
286 if (mode == kPitchFilterPreGain) {
287 /* Clear buffers. */
288 memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
289 memset(filter_parameters.damper_state_dg, 0,
290 sizeof(filter_parameters.damper_state_dg));
291 for (n = 0; n < PITCH_SUBFRAMES; ++n) {
292 //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
293 memset(out_dg[n], 0, sizeof(out_dg[n]));
294 }
295 } else if (mode == kPitchFilterPost) {
296 /* Make output more periodic. Negative sign is to change the structure
297 * of the filter. */
298 for (n = 0; n < PITCH_SUBFRAMES; ++n) {
299 gains[n] *= -kEnhancer;
300 }
301 }
302
303 old_lag = *filter_state->oldlagp;
304 old_gain = *filter_state->oldgainp;
305
306 /* No interpolation if pitch lag step is big. */
307 if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
308 (lags[0] < (PITCH_DOWNSTEP * old_lag))) {
309 old_lag = lags[0];
310 old_gain = gains[0];
311
312 if (mode == kPitchFilterPreGain) {
313 filter_parameters.gain_mult[0] = 1.0;
314 }
315 }
316
317 filter_parameters.num_samples = PITCH_UPDATE;
318 for (m = 0; m < PITCH_SUBFRAMES; ++m) {
319 /* Set the sub-frame value. */
320 filter_parameters.sub_frame = m;
321 /* Calculate interpolation steps for pitch-lag and pitch-gain. */
322 lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
323 filter_parameters.lag = old_lag;
324 gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
325 filter_parameters.gain = old_gain;
326 /* Store for the next sub-frame. */
327 old_lag = lags[m];
328 old_gain = gains[m];
329
330 for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
331 /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
332 * some parameters of filter need to be update. */
333 filter_parameters.gain += gain_delta;
334 filter_parameters.lag += lag_delta;
335 /* Update parameters according to new lag value. */
336 Update(&filter_parameters);
337 /* Filter a segment of input. */
338 FilterSegment(in_data, &filter_parameters, out_data, out_dg);
339 }
340 }
341
342 if (mode != kPitchFilterPreGain) {
343 /* Export buffer and states. */
344 memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
345 sizeof(filter_state->ubuf));
346 memcpy(filter_state->ystate, filter_parameters.damper_state,
347 sizeof(filter_state->ystate));
348
349 /* Store for the next frame. */
350 *filter_state->oldlagp = old_lag;
351 *filter_state->oldgainp = old_gain;
352 }
353
354 if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
355 /* Filter the lookahead segment, this is treated as the last sub-frame. So
356 * set |pf_param| to last sub-frame. */
357 filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
358 filter_parameters.num_samples = QLOOKAHEAD;
359 FilterSegment(in_data, &filter_parameters, out_data, out_dg);
360 }
361 }
362
WebRtcIsac_PitchfilterPre(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)363 void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
364 PitchFiltstr* pf_state, double* lags,
365 double* gains) {
366 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
367 }
368
WebRtcIsac_PitchfilterPre_la(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)369 void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
370 PitchFiltstr* pf_state, double* lags,
371 double* gains) {
372 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
373 NULL);
374 }
375
WebRtcIsac_PitchfilterPre_gains(double * in_data,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD],PitchFiltstr * pf_state,double * lags,double * gains)376 void WebRtcIsac_PitchfilterPre_gains(
377 double* in_data, double* out_data,
378 double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
379 double* lags, double* gains) {
380 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
381 out_dg);
382 }
383
WebRtcIsac_PitchfilterPost(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)384 void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
385 PitchFiltstr* pf_state, double* lags,
386 double* gains) {
387 FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
388 }
389