1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 /***********************************************************
33 * Pitch analyser function
34 ********************************************************** */
35 #include "SigProc_FIX.h"
36 #include "pitch_est_defines.h"
37 #include "stack_alloc.h"
38 #include "debug.h"
39 #include "pitch.h"
40
41 #define SCRATCH_SIZE 22
42 #define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
43 #define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 )
44 #define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 )
45 #define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 )
46 #define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 )
47 #define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 )
48 #define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
49 #define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
50 #define D_COMP_MIN ( MIN_LAG_8KHZ - 3 )
51 #define D_COMP_MAX ( MAX_LAG_8KHZ + 4 )
52 #define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN )
53
54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
55
56 /************************************************************/
57 /* Internally used functions */
58 /************************************************************/
59 static void silk_P_Ana_calc_corr_st3(
60 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
61 const opus_int16 frame[], /* I vector to correlate */
62 opus_int start_lag, /* I lag offset to search around */
63 opus_int sf_length, /* I length of a 5 ms subframe */
64 opus_int nb_subfr, /* I number of subframes */
65 opus_int complexity, /* I Complexity setting */
66 int arch /* I Run-time architecture */
67 );
68
69 static void silk_P_Ana_calc_energy_st3(
70 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
71 const opus_int16 frame[], /* I vector to calc energy in */
72 opus_int start_lag, /* I lag offset to search around */
73 opus_int sf_length, /* I length of one 5 ms subframe */
74 opus_int nb_subfr, /* I number of subframes */
75 opus_int complexity /* I Complexity setting */
76 );
77
78 /*************************************************************/
79 /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
80 /*************************************************************/
silk_pitch_analysis_core(const opus_int16 * frame,opus_int * pitch_out,opus_int16 * lagIndex,opus_int8 * contourIndex,opus_int * LTPCorr_Q15,opus_int prevLag,const opus_int32 search_thres1_Q16,const opus_int search_thres2_Q13,const opus_int Fs_kHz,const opus_int complexity,const opus_int nb_subfr,int arch)81 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
82 const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
83 opus_int *pitch_out, /* O 4 pitch lag values */
84 opus_int16 *lagIndex, /* O Lag Index */
85 opus_int8 *contourIndex, /* O Pitch contour Index */
86 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
87 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
88 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
89 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
90 const opus_int Fs_kHz, /* I Sample frequency (kHz) */
91 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
92 const opus_int nb_subfr, /* I number of 5 ms subframes */
93 int arch /* I Run-time architecture */
94 )
95 {
96 VARDECL( opus_int16, frame_8kHz );
97 VARDECL( opus_int16, frame_4kHz );
98 opus_int32 filt_state[ 6 ];
99 const opus_int16 *input_frame_ptr;
100 opus_int i, k, d, j;
101 VARDECL( opus_int16, C );
102 VARDECL( opus_int32, xcorr32 );
103 const opus_int16 *target_ptr, *basis_ptr;
104 opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
105 opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
106 VARDECL( opus_int16, d_comp );
107 opus_int32 sum, threshold, lag_counter;
108 opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
109 opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
110 VARDECL( silk_pe_stage3_vals, energies_st3 );
111 VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
112 opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
113 opus_int sf_length;
114 opus_int min_lag;
115 opus_int max_lag;
116 opus_int32 contour_bias_Q15, diff;
117 opus_int nb_cbk_search, cbk_size;
118 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
119 const opus_int8 *Lag_CB_ptr;
120 SAVE_STACK;
121 /* Check for valid sampling frequency */
122 silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
123
124 /* Check for valid complexity setting */
125 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
126 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
127
128 silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
129 silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
130
131 /* Set up frame lengths max / min lag for the sampling frequency */
132 frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
133 frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
134 frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
135 sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
136 min_lag = PE_MIN_LAG_MS * Fs_kHz;
137 max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
138
139 /* Resample from input sampled at Fs_kHz to 8 kHz */
140 ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
141 if( Fs_kHz == 16 ) {
142 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
143 silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
144 } else if( Fs_kHz == 12 ) {
145 silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
146 silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
147 } else {
148 silk_assert( Fs_kHz == 8 );
149 silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
150 }
151
152 /* Decimate again to 4 kHz */
153 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
154 ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
155 silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
156
157 /* Low-pass filter */
158 for( i = frame_length_4kHz - 1; i > 0; i-- ) {
159 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
160 }
161
162 /*******************************************************************************
163 ** Scale 4 kHz signal down to prevent correlations measures from overflowing
164 ** find scaling as max scaling for each 8kHz(?) subframe
165 *******************************************************************************/
166
167 /* Inner product is calculated with different lengths, so scale for the worst case */
168 silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
169 if( shift > 0 ) {
170 shift = silk_RSHIFT( shift, 1 );
171 for( i = 0; i < frame_length_4kHz; i++ ) {
172 frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
173 }
174 }
175
176 /******************************************************************************
177 * FIRST STAGE, operating in 4 khz
178 ******************************************************************************/
179 ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
180 ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
181 silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
182 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
183 for( k = 0; k < nb_subfr >> 1; k++ ) {
184 /* Check that we are within range of the array */
185 silk_assert( target_ptr >= frame_4kHz );
186 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
187
188 basis_ptr = target_ptr - MIN_LAG_4KHZ;
189
190 /* Check that we are within range of the array */
191 silk_assert( basis_ptr >= frame_4kHz );
192 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
193
194 celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
195
196 /* Calculate first vector products before loop */
197 cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
198 normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
199 normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
200 normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
201
202 matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
203 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
204
205 /* From now on normalizer is computed recursively */
206 for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
207 basis_ptr--;
208
209 /* Check that we are within range of the array */
210 silk_assert( basis_ptr >= frame_4kHz );
211 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
212
213 cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
214
215 /* Add contribution of new sample and remove contribution from oldest sample */
216 normalizer = silk_ADD32( normalizer,
217 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
218 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
219
220 matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
221 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
222 }
223 /* Update target pointer */
224 target_ptr += SF_LENGTH_8KHZ;
225 }
226
227 /* Combine two subframes into single correlation measure and apply short-lag bias */
228 if( nb_subfr == PE_MAX_NB_SUBFR ) {
229 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
230 sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
231 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */
232 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
233 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
234 }
235 } else {
236 /* Only short-lag bias */
237 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
238 sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */
239 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
240 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
241 }
242 }
243
244 /* Sort */
245 length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
246 silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
247 silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
248 length_d_srch );
249
250 /* Escape if correlation is very low already here */
251 Cmax = (opus_int)C[ 0 ]; /* Q14 */
252 if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
253 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
254 *LTPCorr_Q15 = 0;
255 *lagIndex = 0;
256 *contourIndex = 0;
257 RESTORE_STACK;
258 return 1;
259 }
260
261 threshold = silk_SMULWB( search_thres1_Q16, Cmax );
262 for( i = 0; i < length_d_srch; i++ ) {
263 /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
264 if( C[ i ] > threshold ) {
265 d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
266 } else {
267 length_d_srch = i;
268 break;
269 }
270 }
271 silk_assert( length_d_srch > 0 );
272
273 ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
274 for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
275 d_comp[ i - D_COMP_MIN ] = 0;
276 }
277 for( i = 0; i < length_d_srch; i++ ) {
278 d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
279 }
280
281 /* Convolution */
282 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
283 d_comp[ i - D_COMP_MIN ] +=
284 d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
285 }
286
287 length_d_srch = 0;
288 for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
289 if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
290 d_srch[ length_d_srch ] = i;
291 length_d_srch++;
292 }
293 }
294
295 /* Convolution */
296 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
297 d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
298 + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
299 }
300
301 length_d_comp = 0;
302 for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
303 if( d_comp[ i - D_COMP_MIN ] > 0 ) {
304 d_comp[ length_d_comp ] = i - 2;
305 length_d_comp++;
306 }
307 }
308
309 /**********************************************************************************
310 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
311 *************************************************************************************/
312
313 /******************************************************************************
314 ** Scale signal down to avoid correlations measures from overflowing
315 *******************************************************************************/
316 /* find scaling as max scaling for each subframe */
317 silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
318 if( shift > 0 ) {
319 shift = silk_RSHIFT( shift, 1 );
320 for( i = 0; i < frame_length_8kHz; i++ ) {
321 frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
322 }
323 }
324
325 /*********************************************************************************
326 * Find energy of each subframe projected onto its history, for a range of delays
327 *********************************************************************************/
328 silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
329
330 target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
331 for( k = 0; k < nb_subfr; k++ ) {
332
333 /* Check that we are within range of the array */
334 silk_assert( target_ptr >= frame_8kHz );
335 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
336
337 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ ), 1 );
338 for( j = 0; j < length_d_comp; j++ ) {
339 d = d_comp[ j ];
340 basis_ptr = target_ptr - d;
341
342 /* Check that we are within range of the array */
343 silk_assert( basis_ptr >= frame_8kHz );
344 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
345
346 cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
347 if( cross_corr > 0 ) {
348 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ );
349 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
350 (opus_int16)silk_DIV32_varQ( cross_corr,
351 silk_ADD32( energy_target,
352 energy_basis ),
353 13 + 1 ); /* Q13 */
354 } else {
355 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
356 }
357 }
358 target_ptr += SF_LENGTH_8KHZ;
359 }
360
361 /* search over lag range and lags codebook */
362 /* scale factor for lag codebook, as a function of center lag */
363
364 CCmax = silk_int32_MIN;
365 CCmax_b = silk_int32_MIN;
366
367 CBimax = 0; /* To avoid returning undefined lag values */
368 lag = -1; /* To check if lag with strong enough correlation has been found */
369
370 if( prevLag > 0 ) {
371 if( Fs_kHz == 12 ) {
372 prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
373 } else if( Fs_kHz == 16 ) {
374 prevLag = silk_RSHIFT( prevLag, 1 );
375 }
376 prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
377 } else {
378 prevLag_log2_Q7 = 0;
379 }
380 silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
381 /* Set up stage 2 codebook based on number of subframes */
382 if( nb_subfr == PE_MAX_NB_SUBFR ) {
383 cbk_size = PE_NB_CBKS_STAGE2_EXT;
384 Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
385 if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
386 /* If input is 8 khz use a larger codebook here because it is last stage */
387 nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
388 } else {
389 nb_cbk_search = PE_NB_CBKS_STAGE2;
390 }
391 } else {
392 cbk_size = PE_NB_CBKS_STAGE2_10MS;
393 Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
394 nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
395 }
396
397 for( k = 0; k < length_d_srch; k++ ) {
398 d = d_srch[ k ];
399 for( j = 0; j < nb_cbk_search; j++ ) {
400 CC[ j ] = 0;
401 for( i = 0; i < nb_subfr; i++ ) {
402 opus_int d_subfr;
403 /* Try all codebooks */
404 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
405 CC[ j ] = CC[ j ]
406 + (opus_int32)matrix_ptr( C, i,
407 d_subfr - ( MIN_LAG_8KHZ - 2 ),
408 CSTRIDE_8KHZ );
409 }
410 }
411 /* Find best codebook */
412 CCmax_new = silk_int32_MIN;
413 CBimax_new = 0;
414 for( i = 0; i < nb_cbk_search; i++ ) {
415 if( CC[ i ] > CCmax_new ) {
416 CCmax_new = CC[ i ];
417 CBimax_new = i;
418 }
419 }
420
421 /* Bias towards shorter lags */
422 lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
423 silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
424 silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
425 CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
426
427 /* Bias towards previous lag */
428 silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
429 if( prevLag > 0 ) {
430 delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
431 silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
432 delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
433 prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
434 prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
435 CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
436 }
437
438 if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
439 CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
440 silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */
441 ) {
442 CCmax_b = CCmax_new_b;
443 CCmax = CCmax_new;
444 lag = d;
445 CBimax = CBimax_new;
446 }
447 }
448
449 if( lag == -1 ) {
450 /* No suitable candidate found */
451 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
452 *LTPCorr_Q15 = 0;
453 *lagIndex = 0;
454 *contourIndex = 0;
455 RESTORE_STACK;
456 return 1;
457 }
458
459 /* Output normalized correlation */
460 *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
461 silk_assert( *LTPCorr_Q15 >= 0 );
462
463 if( Fs_kHz > 8 ) {
464 VARDECL( opus_int16, scratch_mem );
465 /***************************************************************************/
466 /* Scale input signal down to avoid correlations measures from overflowing */
467 /***************************************************************************/
468 /* find scaling as max scaling for each subframe */
469 silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
470 ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
471 if( shift > 0 ) {
472 /* Move signal to scratch mem because the input signal should be unchanged */
473 shift = silk_RSHIFT( shift, 1 );
474 for( i = 0; i < frame_length; i++ ) {
475 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
476 }
477 input_frame_ptr = scratch_mem;
478 } else {
479 input_frame_ptr = frame;
480 }
481
482 /* Search in original signal */
483
484 CBimax_old = CBimax;
485 /* Compensate for decimation */
486 silk_assert( lag == silk_SAT16( lag ) );
487 if( Fs_kHz == 12 ) {
488 lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
489 } else if( Fs_kHz == 16 ) {
490 lag = silk_LSHIFT( lag, 1 );
491 } else {
492 lag = silk_SMULBB( lag, 3 );
493 }
494
495 lag = silk_LIMIT_int( lag, min_lag, max_lag );
496 start_lag = silk_max_int( lag - 2, min_lag );
497 end_lag = silk_min_int( lag + 2, max_lag );
498 lag_new = lag; /* to avoid undefined lag */
499 CBimax = 0; /* to avoid undefined lag */
500
501 CCmax = silk_int32_MIN;
502 /* pitch lags according to second stage */
503 for( k = 0; k < nb_subfr; k++ ) {
504 pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
505 }
506
507 /* Set up codebook parameters according to complexity setting and frame length */
508 if( nb_subfr == PE_MAX_NB_SUBFR ) {
509 nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
510 cbk_size = PE_NB_CBKS_STAGE3_MAX;
511 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
512 } else {
513 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
514 cbk_size = PE_NB_CBKS_STAGE3_10MS;
515 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
516 }
517
518 /* Calculate the correlations and energies needed in stage 3 */
519 ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
520 ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
521 silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
522 silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity );
523
524 lag_counter = 0;
525 silk_assert( lag == silk_SAT16( lag ) );
526 contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
527
528 target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
529 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );
530 for( d = start_lag; d <= end_lag; d++ ) {
531 for( j = 0; j < nb_cbk_search; j++ ) {
532 cross_corr = 0;
533 energy = energy_target;
534 for( k = 0; k < nb_subfr; k++ ) {
535 cross_corr = silk_ADD32( cross_corr,
536 matrix_ptr( cross_corr_st3, k, j,
537 nb_cbk_search )[ lag_counter ] );
538 energy = silk_ADD32( energy,
539 matrix_ptr( energies_st3, k, j,
540 nb_cbk_search )[ lag_counter ] );
541 silk_assert( energy >= 0 );
542 }
543 if( cross_corr > 0 ) {
544 CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */
545 /* Reduce depending on flatness of contour */
546 diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */
547 silk_assert( diff == silk_SAT16( diff ) );
548 CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */
549 } else {
550 CCmax_new = 0;
551 }
552
553 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
554 CCmax = CCmax_new;
555 lag_new = d;
556 CBimax = j;
557 }
558 }
559 lag_counter++;
560 }
561
562 for( k = 0; k < nb_subfr; k++ ) {
563 pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
564 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
565 }
566 *lagIndex = (opus_int16)( lag_new - min_lag);
567 *contourIndex = (opus_int8)CBimax;
568 } else { /* Fs_kHz == 8 */
569 /* Save Lags */
570 for( k = 0; k < nb_subfr; k++ ) {
571 pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
572 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
573 }
574 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
575 *contourIndex = (opus_int8)CBimax;
576 }
577 silk_assert( *lagIndex >= 0 );
578 /* return as voiced */
579 RESTORE_STACK;
580 return 0;
581 }
582
583 /***********************************************************************
584 * Calculates the correlations used in stage 3 search. In order to cover
585 * the whole lag codebook for all the searched offset lags (lag +- 2),
586 * the following correlations are needed in each sub frame:
587 *
588 * sf1: lag range [-8,...,7] total 16 correlations
589 * sf2: lag range [-4,...,4] total 9 correlations
590 * sf3: lag range [-3,....4] total 8 correltions
591 * sf4: lag range [-6,....8] total 15 correlations
592 *
593 * In total 48 correlations. The direct implementation computed in worst
594 * case 4*12*5 = 240 correlations, but more likely around 120.
595 ***********************************************************************/
silk_P_Ana_calc_corr_st3(silk_pe_stage3_vals cross_corr_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity,int arch)596 static void silk_P_Ana_calc_corr_st3(
597 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
598 const opus_int16 frame[], /* I vector to correlate */
599 opus_int start_lag, /* I lag offset to search around */
600 opus_int sf_length, /* I length of a 5 ms subframe */
601 opus_int nb_subfr, /* I number of subframes */
602 opus_int complexity, /* I Complexity setting */
603 int arch /* I Run-time architecture */
604 )
605 {
606 const opus_int16 *target_ptr;
607 opus_int i, j, k, lag_counter, lag_low, lag_high;
608 opus_int nb_cbk_search, delta, idx, cbk_size;
609 VARDECL( opus_int32, scratch_mem );
610 VARDECL( opus_int32, xcorr32 );
611 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
612 SAVE_STACK;
613
614 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
615 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
616
617 if( nb_subfr == PE_MAX_NB_SUBFR ) {
618 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
619 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
620 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
621 cbk_size = PE_NB_CBKS_STAGE3_MAX;
622 } else {
623 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
624 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
625 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
626 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
627 cbk_size = PE_NB_CBKS_STAGE3_10MS;
628 }
629 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
630 ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
631
632 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
633 for( k = 0; k < nb_subfr; k++ ) {
634 lag_counter = 0;
635
636 /* Calculate the correlations for each subframe */
637 lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
638 lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
639 silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
640 celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
641 for( j = lag_low; j <= lag_high; j++ ) {
642 silk_assert( lag_counter < SCRATCH_SIZE );
643 scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
644 lag_counter++;
645 }
646
647 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
648 for( i = 0; i < nb_cbk_search; i++ ) {
649 /* Fill out the 3 dim array that stores the correlations for */
650 /* each code_book vector for each start lag */
651 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
652 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
653 silk_assert( idx + j < SCRATCH_SIZE );
654 silk_assert( idx + j < lag_counter );
655 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
656 scratch_mem[ idx + j ];
657 }
658 }
659 target_ptr += sf_length;
660 }
661 RESTORE_STACK;
662 }
663
664 /********************************************************************/
665 /* Calculate the energies for first two subframes. The energies are */
666 /* calculated recursively. */
667 /********************************************************************/
silk_P_Ana_calc_energy_st3(silk_pe_stage3_vals energies_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity)668 static void silk_P_Ana_calc_energy_st3(
669 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
670 const opus_int16 frame[], /* I vector to calc energy in */
671 opus_int start_lag, /* I lag offset to search around */
672 opus_int sf_length, /* I length of one 5 ms subframe */
673 opus_int nb_subfr, /* I number of subframes */
674 opus_int complexity /* I Complexity setting */
675 )
676 {
677 const opus_int16 *target_ptr, *basis_ptr;
678 opus_int32 energy;
679 opus_int k, i, j, lag_counter;
680 opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
681 VARDECL( opus_int32, scratch_mem );
682 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
683 SAVE_STACK;
684
685 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
686 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
687
688 if( nb_subfr == PE_MAX_NB_SUBFR ) {
689 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
690 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
691 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
692 cbk_size = PE_NB_CBKS_STAGE3_MAX;
693 } else {
694 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
695 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
696 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
697 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
698 cbk_size = PE_NB_CBKS_STAGE3_10MS;
699 }
700 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
701
702 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
703 for( k = 0; k < nb_subfr; k++ ) {
704 lag_counter = 0;
705
706 /* Calculate the energy for first lag */
707 basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
708 energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length );
709 silk_assert( energy >= 0 );
710 scratch_mem[ lag_counter ] = energy;
711 lag_counter++;
712
713 lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
714 for( i = 1; i < lag_diff; i++ ) {
715 /* remove part outside new window */
716 energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
717 silk_assert( energy >= 0 );
718
719 /* add part that comes into window */
720 energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
721 silk_assert( energy >= 0 );
722 silk_assert( lag_counter < SCRATCH_SIZE );
723 scratch_mem[ lag_counter ] = energy;
724 lag_counter++;
725 }
726
727 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
728 for( i = 0; i < nb_cbk_search; i++ ) {
729 /* Fill out the 3 dim array that stores the correlations for */
730 /* each code_book vector for each start lag */
731 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
732 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
733 silk_assert( idx + j < SCRATCH_SIZE );
734 silk_assert( idx + j < lag_counter );
735 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
736 scratch_mem[ idx + j ];
737 silk_assert(
738 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
739 }
740 }
741 target_ptr += sf_length;
742 }
743 RESTORE_STACK;
744 }
745