• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <math.h>
12 #include <memory.h>
13 #include <stdlib.h>
14 
15 #include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
16 #include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
17 #include "rtc_base/compile_assert_c.h"
18 
19 /*
20  * We are implementing the following filters;
21  *
22  * Pre-filtering:
23  *   y(z) = x(z) + damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
24  *
25  * Post-filtering:
26  *   y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
27  *
28  * Note that |lag| is a floating number so we perform an interpolation to
29  * obtain the correct |lag|.
30  *
31  */
32 
33 static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25,
34     -0.07};
35 
36 /* interpolation coefficients; generated by design_pitch_filter.m */
37 static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
38     {-0.02239172458614,  0.06653315052934, -0.16515880017569,  0.60701333734125,
39      0.64671399919202, -0.20249000396417,  0.09926548334755, -0.04765933793109,
40      0.01754159521746},
41     {-0.01985640750434,  0.05816126837866, -0.13991265473714,  0.44560418147643,
42      0.79117042386876, -0.20266133815188,  0.09585268418555, -0.04533310458084,
43      0.01654127246314},
44     {-0.01463300534216,  0.04229888475060, -0.09897034715253,  0.28284326017787,
45      0.90385267956632, -0.16976950138649,  0.07704272393639, -0.03584218578311,
46      0.01295781500709},
47     {-0.00764851320885,  0.02184035544377, -0.04985561057281,  0.13083306574393,
48      0.97545011664662, -0.10177807997561,  0.04400901776474, -0.02010737175166,
49      0.00719783432422},
50     {-0.00000000000000,  0.00000000000000, -0.00000000000001,  0.00000000000001,
51      0.99999999999999,  0.00000000000001, -0.00000000000001,  0.00000000000000,
52      -0.00000000000000},
53     {0.00719783432422, -0.02010737175166,  0.04400901776474, -0.10177807997562,
54      0.97545011664663,  0.13083306574393, -0.04985561057280,  0.02184035544377,
55      -0.00764851320885},
56     {0.01295781500710, -0.03584218578312,  0.07704272393640, -0.16976950138650,
57      0.90385267956634,  0.28284326017785, -0.09897034715252,  0.04229888475059,
58      -0.01463300534216},
59     {0.01654127246315, -0.04533310458085,  0.09585268418557, -0.20266133815190,
60      0.79117042386878,  0.44560418147640, -0.13991265473712,  0.05816126837865,
61      -0.01985640750433}
62 };
63 
64 /*
65  * Enumerating the operation of the filter.
66  * iSAC has 4 different pitch-filter which are very similar in their structure.
67  *
68  * kPitchFilterPre     : In this mode the filter is operating as pitch
69  *                       pre-filter. This is used at the encoder.
70  * kPitchFilterPost    : In this mode the filter is operating as pitch
71  *                       post-filter. This is the inverse of pre-filter and used
72  *                       in the decoder.
73  * kPitchFilterPreLa   : This is, in structure, similar to pre-filtering but
74  *                       utilizing 3 millisecond lookahead. It is used to
75  *                       obtain the signal for LPC analysis.
76  * kPitchFilterPreGain : This is, in structure, similar to pre-filtering but
77  *                       differential changes in gain is considered. This is
78  *                       used to find the optimal gain.
79  */
80 typedef enum {
81   kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain
82 } PitchFilterOperation;
83 
84 /*
85  * Structure with parameters used for pitch-filtering.
86  * buffer           : a buffer where the sum of previous inputs and outputs
87  *                    are stored.
88  * damper_state     : the state of the damping filter. The filter is defined by
89  *                    |kDampFilter|.
90  * interpol_coeff   : pointer to a set of coefficient which are used to utilize
91  *                    fractional pitch by interpolation.
92  * gain             : pitch-gain to be applied to the current segment of input.
93  * lag              : pitch-lag for the current segment of input.
94  * lag_offset       : the offset of lag w.r.t. current sample.
95  * sub_frame        : sub-frame index, there are 4 pitch sub-frames in an iSAC
96  *                    frame.
97  *                    This specifies the usage of the filter. See
98  *                    'PitchFilterOperation' for operational modes.
99  * num_samples      : number of samples to be processed in each segment.
100  * index            : index of the input and output sample.
101  * damper_state_dg  : state of damping filter for different trial gains.
102  * gain_mult        : differential changes to gain.
103  */
104 typedef struct {
105   double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD];
106   double damper_state[PITCH_DAMPORDER];
107   const double *interpol_coeff;
108   double gain;
109   double lag;
110   int lag_offset;
111 
112   int sub_frame;
113   PitchFilterOperation mode;
114   int num_samples;
115   int index;
116 
117   double damper_state_dg[4][PITCH_DAMPORDER];
118   double gain_mult[4];
119 } PitchFilterParam;
120 
121 /**********************************************************************
122  * FilterSegment()
123  * Filter one segment, a quarter of a frame.
124  *
125  * Inputs
126  *   in_data      : pointer to the input signal of 30 ms at 8 kHz sample-rate.
127  *   filter_param : pitch filter parameters.
128  *
129  * Outputs
130  *   out_data     : pointer to a buffer where the filtered signal is written to.
131  *   out_dg       : [only used in kPitchFilterPreGain] pointer to a buffer
132  *                  where the output of different gain values (differential
133  *                  change to gain) is written.
134  */
FilterSegment(const double * in_data,PitchFilterParam * parameters,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD])135 static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
136                           double* out_data,
137                           double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
138   int n;
139   int m;
140   int j;
141   double sum;
142   double sum2;
143   /* Index of |parameters->buffer| where the output is written to. */
144   int pos = parameters->index + PITCH_BUFFSIZE;
145   /* Index of |parameters->buffer| where samples are read for fractional-lag
146    * computation. */
147   int pos_lag = pos - parameters->lag_offset;
148 
149   for (n = 0; n < parameters->num_samples; ++n) {
150     /* Shift low pass filter states. */
151     for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
152       parameters->damper_state[m] = parameters->damper_state[m - 1];
153     }
154     /* Filter to get fractional pitch. */
155     sum = 0.0;
156     for (m = 0; m < PITCH_FRACORDER; ++m) {
157       sum += parameters->buffer[pos_lag + m] * parameters->interpol_coeff[m];
158     }
159     /* Multiply with gain. */
160     parameters->damper_state[0] = parameters->gain * sum;
161 
162     if (parameters->mode == kPitchFilterPreGain) {
163       int lag_index = parameters->index - parameters->lag_offset;
164       int m_tmp = (lag_index < 0) ? -lag_index : 0;
165       /* Update the damper state for the new sample. */
166       for (m = PITCH_DAMPORDER - 1; m > 0; --m) {
167         for (j = 0; j < 4; ++j) {
168           parameters->damper_state_dg[j][m] =
169               parameters->damper_state_dg[j][m - 1];
170         }
171       }
172 
173       for (j = 0; j < parameters->sub_frame + 1; ++j) {
174         /* Filter for fractional pitch. */
175         sum2 = 0.0;
176         for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
177           /* |lag_index + m| is always larger than or equal to zero, see how
178            * m_tmp is computed. This is equivalent to assume samples outside
179            * |out_dg[j]| are zero. */
180           sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
181         }
182         /* Add the contribution of differential gain change. */
183         parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum +
184             parameters->gain * sum2;
185       }
186 
187       /* Filter with damping filter, and store the results. */
188       for (j = 0; j < parameters->sub_frame + 1; ++j) {
189         sum = 0.0;
190         for (m = 0; m < PITCH_DAMPORDER; ++m) {
191           sum -= parameters->damper_state_dg[j][m] * kDampFilter[m];
192         }
193         out_dg[j][parameters->index] = sum;
194       }
195     }
196     /* Filter with damping filter. */
197     sum = 0.0;
198     for (m = 0; m < PITCH_DAMPORDER; ++m) {
199       sum += parameters->damper_state[m] * kDampFilter[m];
200     }
201 
202     /* Subtract from input and update buffer. */
203     out_data[parameters->index] = in_data[parameters->index] - sum;
204     parameters->buffer[pos] = in_data[parameters->index] +
205         out_data[parameters->index];
206 
207     ++parameters->index;
208     ++pos;
209     ++pos_lag;
210   }
211   return;
212 }
213 
214 /* Update filter parameters based on the pitch-gains and pitch-lags. */
Update(PitchFilterParam * parameters)215 static void Update(PitchFilterParam* parameters) {
216   double fraction;
217   int fraction_index;
218   /* Compute integer lag-offset. */
219   parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY +
220                                             0.5);
221   /* Find correct set of coefficients for computing fractional pitch. */
222   fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
223   fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
224   parameters->interpol_coeff = kIntrpCoef[fraction_index];
225 
226   if (parameters->mode == kPitchFilterPreGain) {
227     /* If in this mode make a differential change to pitch gain. */
228     parameters->gain_mult[parameters->sub_frame] += 0.2;
229     if (parameters->gain_mult[parameters->sub_frame] > 1.0) {
230       parameters->gain_mult[parameters->sub_frame] = 1.0;
231     }
232     if (parameters->sub_frame > 0) {
233       parameters->gain_mult[parameters->sub_frame - 1] -= 0.2;
234     }
235   }
236 }
237 
238 /******************************************************************************
239  * FilterFrame()
240  * Filter a frame of 30 millisecond, given pitch-lags and pitch-gains.
241  *
242  * Inputs
243  *   in_data     : pointer to the input signal of 30 ms at 8 kHz sample-rate.
244  *   lags        : pointer to pitch-lags, 4 lags per frame.
245  *   gains       : pointer to pitch-gians, 4 gains per frame.
246  *   mode        : defining the functionality of the filter. It takes the
247  *                 following values.
248  *                 kPitchFilterPre:     Pitch pre-filter, used at encoder.
249  *                 kPitchFilterPost:    Pitch post-filter, used at decoder.
250  *                 kPitchFilterPreLa:   Pitch pre-filter with lookahead.
251  *                 kPitchFilterPreGain: Pitch pre-filter used to otain optimal
252  *                                      pitch-gains.
253  *
254  * Outputs
255  *   out_data    : pointer to a buffer where the filtered signal is written to.
256  *   out_dg      : [only used in kPitchFilterPreGain] pointer to a buffer
257  *                 where the output of different gain values (differential
258  *                 change to gain) is written.
259  */
FilterFrame(const double * in_data,PitchFiltstr * filter_state,double * lags,double * gains,PitchFilterOperation mode,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD])260 static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
261                         double* lags, double* gains, PitchFilterOperation mode,
262                         double* out_data,
263                         double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
264   PitchFilterParam filter_parameters;
265   double gain_delta, lag_delta;
266   double old_lag, old_gain;
267   int n;
268   int m;
269   const double kEnhancer = 1.3;
270 
271   /* Set up buffer and states. */
272   filter_parameters.index = 0;
273   filter_parameters.lag_offset = 0;
274   filter_parameters.mode = mode;
275   /* Copy states to local variables. */
276   memcpy(filter_parameters.buffer, filter_state->ubuf,
277          sizeof(filter_state->ubuf));
278   RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >=
279                  sizeof(filter_state->ubuf));
280   memset(filter_parameters.buffer +
281              sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]),
282          0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf));
283   memcpy(filter_parameters.damper_state, filter_state->ystate,
284          sizeof(filter_state->ystate));
285 
286   if (mode == kPitchFilterPreGain) {
287     /* Clear buffers. */
288     memset(filter_parameters.gain_mult, 0, sizeof(filter_parameters.gain_mult));
289     memset(filter_parameters.damper_state_dg, 0,
290            sizeof(filter_parameters.damper_state_dg));
291     for (n = 0; n < PITCH_SUBFRAMES; ++n) {
292       //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD));
293       memset(out_dg[n], 0, sizeof(out_dg[n]));
294     }
295   } else if (mode == kPitchFilterPost) {
296     /* Make output more periodic. Negative sign is to change the structure
297      * of the filter. */
298     for (n = 0; n < PITCH_SUBFRAMES; ++n) {
299       gains[n] *= -kEnhancer;
300     }
301   }
302 
303   old_lag = *filter_state->oldlagp;
304   old_gain = *filter_state->oldgainp;
305 
306   /* No interpolation if pitch lag step is big. */
307   if ((lags[0] > (PITCH_UPSTEP * old_lag)) ||
308       (lags[0] < (PITCH_DOWNSTEP * old_lag))) {
309     old_lag = lags[0];
310     old_gain = gains[0];
311 
312     if (mode == kPitchFilterPreGain) {
313       filter_parameters.gain_mult[0] = 1.0;
314     }
315   }
316 
317   filter_parameters.num_samples = PITCH_UPDATE;
318   for (m = 0; m < PITCH_SUBFRAMES; ++m) {
319     /* Set the sub-frame value. */
320     filter_parameters.sub_frame = m;
321     /* Calculate interpolation steps for pitch-lag and pitch-gain. */
322     lag_delta = (lags[m] - old_lag) / PITCH_GRAN_PER_SUBFRAME;
323     filter_parameters.lag = old_lag;
324     gain_delta = (gains[m] - old_gain) / PITCH_GRAN_PER_SUBFRAME;
325     filter_parameters.gain = old_gain;
326     /* Store for the next sub-frame. */
327     old_lag = lags[m];
328     old_gain = gains[m];
329 
330     for (n = 0; n < PITCH_GRAN_PER_SUBFRAME; ++n) {
331       /* Step-wise interpolation of pitch gains and lags. As pitch-lag changes,
332        * some parameters of filter need to be update. */
333       filter_parameters.gain += gain_delta;
334       filter_parameters.lag += lag_delta;
335       /* Update parameters according to new lag value. */
336       Update(&filter_parameters);
337       /* Filter a segment of input. */
338       FilterSegment(in_data, &filter_parameters, out_data, out_dg);
339     }
340   }
341 
342   if (mode != kPitchFilterPreGain) {
343     /* Export buffer and states. */
344     memcpy(filter_state->ubuf, &filter_parameters.buffer[PITCH_FRAME_LEN],
345            sizeof(filter_state->ubuf));
346     memcpy(filter_state->ystate, filter_parameters.damper_state,
347            sizeof(filter_state->ystate));
348 
349     /* Store for the next frame. */
350     *filter_state->oldlagp = old_lag;
351     *filter_state->oldgainp = old_gain;
352   }
353 
354   if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
355     /* Filter the lookahead segment, this is treated as the last sub-frame. So
356      * set |pf_param| to last sub-frame. */
357     filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
358     filter_parameters.num_samples = QLOOKAHEAD;
359     FilterSegment(in_data, &filter_parameters, out_data, out_dg);
360   }
361 }
362 
WebRtcIsac_PitchfilterPre(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)363 void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data,
364                                PitchFiltstr* pf_state, double* lags,
365                                double* gains) {
366   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
367 }
368 
WebRtcIsac_PitchfilterPre_la(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)369 void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data,
370                                   PitchFiltstr* pf_state, double* lags,
371                                   double* gains) {
372   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
373               NULL);
374 }
375 
WebRtcIsac_PitchfilterPre_gains(double * in_data,double * out_data,double out_dg[][PITCH_FRAME_LEN+QLOOKAHEAD],PitchFiltstr * pf_state,double * lags,double * gains)376 void WebRtcIsac_PitchfilterPre_gains(
377     double* in_data, double* out_data,
378     double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state,
379     double* lags, double* gains) {
380   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
381               out_dg);
382 }
383 
WebRtcIsac_PitchfilterPost(double * in_data,double * out_data,PitchFiltstr * pf_state,double * lags,double * gains)384 void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data,
385                                 PitchFiltstr* pf_state, double* lags,
386                                 double* gains) {
387   FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
388 }
389