1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 /***********************************************************
33 * Pitch analyser function
34 ********************************************************** */
35 #include "SigProc_FIX.h"
36 #include "pitch_est_defines.h"
37 #include "stack_alloc.h"
38 #include "debug.h"
39 #include "pitch.h"
40
41 #define SCRATCH_SIZE 22
42 #define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
43 #define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 )
44 #define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 )
45 #define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 )
46 #define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 )
47 #define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 )
48 #define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
49 #define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
50 #define D_COMP_MIN ( MIN_LAG_8KHZ - 3 )
51 #define D_COMP_MAX ( MAX_LAG_8KHZ + 4 )
52 #define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN )
53
54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
55
56 /************************************************************/
57 /* Internally used functions */
58 /************************************************************/
59 static void silk_P_Ana_calc_corr_st3(
60 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
61 const opus_int16 frame[], /* I vector to correlate */
62 opus_int start_lag, /* I lag offset to search around */
63 opus_int sf_length, /* I length of a 5 ms subframe */
64 opus_int nb_subfr, /* I number of subframes */
65 opus_int complexity, /* I Complexity setting */
66 int arch /* I Run-time architecture */
67 );
68
69 static void silk_P_Ana_calc_energy_st3(
70 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
71 const opus_int16 frame[], /* I vector to calc energy in */
72 opus_int start_lag, /* I lag offset to search around */
73 opus_int sf_length, /* I length of one 5 ms subframe */
74 opus_int nb_subfr, /* I number of subframes */
75 opus_int complexity, /* I Complexity setting */
76 int arch /* I Run-time architecture */
77 );
78
79 /*************************************************************/
80 /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
81 /*************************************************************/
silk_pitch_analysis_core(const opus_int16 * frame,opus_int * pitch_out,opus_int16 * lagIndex,opus_int8 * contourIndex,opus_int * LTPCorr_Q15,opus_int prevLag,const opus_int32 search_thres1_Q16,const opus_int search_thres2_Q13,const opus_int Fs_kHz,const opus_int complexity,const opus_int nb_subfr,int arch)82 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
83 const opus_int16 *frame, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
84 opus_int *pitch_out, /* O 4 pitch lag values */
85 opus_int16 *lagIndex, /* O Lag Index */
86 opus_int8 *contourIndex, /* O Pitch contour Index */
87 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
88 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
89 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
90 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
91 const opus_int Fs_kHz, /* I Sample frequency (kHz) */
92 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
93 const opus_int nb_subfr, /* I number of 5 ms subframes */
94 int arch /* I Run-time architecture */
95 )
96 {
97 VARDECL( opus_int16, frame_8kHz );
98 VARDECL( opus_int16, frame_4kHz );
99 opus_int32 filt_state[ 6 ];
100 const opus_int16 *input_frame_ptr;
101 opus_int i, k, d, j;
102 VARDECL( opus_int16, C );
103 VARDECL( opus_int32, xcorr32 );
104 const opus_int16 *target_ptr, *basis_ptr;
105 opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
106 opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
107 VARDECL( opus_int16, d_comp );
108 opus_int32 sum, threshold, lag_counter;
109 opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
110 opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
111 VARDECL( silk_pe_stage3_vals, energies_st3 );
112 VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
113 opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
114 opus_int sf_length;
115 opus_int min_lag;
116 opus_int max_lag;
117 opus_int32 contour_bias_Q15, diff;
118 opus_int nb_cbk_search, cbk_size;
119 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
120 const opus_int8 *Lag_CB_ptr;
121 SAVE_STACK;
122 /* Check for valid sampling frequency */
123 silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
124
125 /* Check for valid complexity setting */
126 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
127 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
128
129 silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
130 silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
131
132 /* Set up frame lengths max / min lag for the sampling frequency */
133 frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
134 frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
135 frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
136 sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
137 min_lag = PE_MIN_LAG_MS * Fs_kHz;
138 max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
139
140 /* Resample from input sampled at Fs_kHz to 8 kHz */
141 ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
142 if( Fs_kHz == 16 ) {
143 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
144 silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
145 } else if( Fs_kHz == 12 ) {
146 silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
147 silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
148 } else {
149 silk_assert( Fs_kHz == 8 );
150 silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
151 }
152
153 /* Decimate again to 4 kHz */
154 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
155 ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
156 silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
157
158 /* Low-pass filter */
159 for( i = frame_length_4kHz - 1; i > 0; i-- ) {
160 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
161 }
162
163 /*******************************************************************************
164 ** Scale 4 kHz signal down to prevent correlations measures from overflowing
165 ** find scaling as max scaling for each 8kHz(?) subframe
166 *******************************************************************************/
167
168 /* Inner product is calculated with different lengths, so scale for the worst case */
169 silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
170 if( shift > 0 ) {
171 shift = silk_RSHIFT( shift, 1 );
172 for( i = 0; i < frame_length_4kHz; i++ ) {
173 frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
174 }
175 }
176
177 /******************************************************************************
178 * FIRST STAGE, operating in 4 khz
179 ******************************************************************************/
180 ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
181 ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
182 silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
183 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
184 for( k = 0; k < nb_subfr >> 1; k++ ) {
185 /* Check that we are within range of the array */
186 silk_assert( target_ptr >= frame_4kHz );
187 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
188
189 basis_ptr = target_ptr - MIN_LAG_4KHZ;
190
191 /* Check that we are within range of the array */
192 silk_assert( basis_ptr >= frame_4kHz );
193 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
194
195 celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
196
197 /* Calculate first vector products before loop */
198 cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
199 normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
200 normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) );
201 normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
202
203 matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
204 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
205
206 /* From now on normalizer is computed recursively */
207 for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
208 basis_ptr--;
209
210 /* Check that we are within range of the array */
211 silk_assert( basis_ptr >= frame_4kHz );
212 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
213
214 cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
215
216 /* Add contribution of new sample and remove contribution from oldest sample */
217 normalizer = silk_ADD32( normalizer,
218 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
219 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
220
221 matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
222 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
223 }
224 /* Update target pointer */
225 target_ptr += SF_LENGTH_8KHZ;
226 }
227
228 /* Combine two subframes into single correlation measure and apply short-lag bias */
229 if( nb_subfr == PE_MAX_NB_SUBFR ) {
230 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
231 sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
232 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */
233 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
234 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
235 }
236 } else {
237 /* Only short-lag bias */
238 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
239 sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */
240 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
241 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
242 }
243 }
244
245 /* Sort */
246 length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
247 silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
248 silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
249 length_d_srch );
250
251 /* Escape if correlation is very low already here */
252 Cmax = (opus_int)C[ 0 ]; /* Q14 */
253 if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
254 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
255 *LTPCorr_Q15 = 0;
256 *lagIndex = 0;
257 *contourIndex = 0;
258 RESTORE_STACK;
259 return 1;
260 }
261
262 threshold = silk_SMULWB( search_thres1_Q16, Cmax );
263 for( i = 0; i < length_d_srch; i++ ) {
264 /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
265 if( C[ i ] > threshold ) {
266 d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
267 } else {
268 length_d_srch = i;
269 break;
270 }
271 }
272 silk_assert( length_d_srch > 0 );
273
274 ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
275 for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
276 d_comp[ i - D_COMP_MIN ] = 0;
277 }
278 for( i = 0; i < length_d_srch; i++ ) {
279 d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
280 }
281
282 /* Convolution */
283 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
284 d_comp[ i - D_COMP_MIN ] +=
285 d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
286 }
287
288 length_d_srch = 0;
289 for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
290 if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
291 d_srch[ length_d_srch ] = i;
292 length_d_srch++;
293 }
294 }
295
296 /* Convolution */
297 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
298 d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
299 + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
300 }
301
302 length_d_comp = 0;
303 for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
304 if( d_comp[ i - D_COMP_MIN ] > 0 ) {
305 d_comp[ length_d_comp ] = i - 2;
306 length_d_comp++;
307 }
308 }
309
310 /**********************************************************************************
311 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
312 *************************************************************************************/
313
314 /******************************************************************************
315 ** Scale signal down to avoid correlations measures from overflowing
316 *******************************************************************************/
317 /* find scaling as max scaling for each subframe */
318 silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
319 if( shift > 0 ) {
320 shift = silk_RSHIFT( shift, 1 );
321 for( i = 0; i < frame_length_8kHz; i++ ) {
322 frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
323 }
324 }
325
326 /*********************************************************************************
327 * Find energy of each subframe projected onto its history, for a range of delays
328 *********************************************************************************/
329 silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
330
331 target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
332 for( k = 0; k < nb_subfr; k++ ) {
333
334 /* Check that we are within range of the array */
335 silk_assert( target_ptr >= frame_8kHz );
336 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
337
338 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
339 for( j = 0; j < length_d_comp; j++ ) {
340 d = d_comp[ j ];
341 basis_ptr = target_ptr - d;
342
343 /* Check that we are within range of the array */
344 silk_assert( basis_ptr >= frame_8kHz );
345 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
346
347 cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
348 if( cross_corr > 0 ) {
349 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
350 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
351 (opus_int16)silk_DIV32_varQ( cross_corr,
352 silk_ADD32( energy_target,
353 energy_basis ),
354 13 + 1 ); /* Q13 */
355 } else {
356 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
357 }
358 }
359 target_ptr += SF_LENGTH_8KHZ;
360 }
361
362 /* search over lag range and lags codebook */
363 /* scale factor for lag codebook, as a function of center lag */
364
365 CCmax = silk_int32_MIN;
366 CCmax_b = silk_int32_MIN;
367
368 CBimax = 0; /* To avoid returning undefined lag values */
369 lag = -1; /* To check if lag with strong enough correlation has been found */
370
371 if( prevLag > 0 ) {
372 if( Fs_kHz == 12 ) {
373 prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
374 } else if( Fs_kHz == 16 ) {
375 prevLag = silk_RSHIFT( prevLag, 1 );
376 }
377 prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
378 } else {
379 prevLag_log2_Q7 = 0;
380 }
381 silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
382 /* Set up stage 2 codebook based on number of subframes */
383 if( nb_subfr == PE_MAX_NB_SUBFR ) {
384 cbk_size = PE_NB_CBKS_STAGE2_EXT;
385 Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
386 if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
387 /* If input is 8 khz use a larger codebook here because it is last stage */
388 nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
389 } else {
390 nb_cbk_search = PE_NB_CBKS_STAGE2;
391 }
392 } else {
393 cbk_size = PE_NB_CBKS_STAGE2_10MS;
394 Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
395 nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
396 }
397
398 for( k = 0; k < length_d_srch; k++ ) {
399 d = d_srch[ k ];
400 for( j = 0; j < nb_cbk_search; j++ ) {
401 CC[ j ] = 0;
402 for( i = 0; i < nb_subfr; i++ ) {
403 opus_int d_subfr;
404 /* Try all codebooks */
405 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
406 CC[ j ] = CC[ j ]
407 + (opus_int32)matrix_ptr( C, i,
408 d_subfr - ( MIN_LAG_8KHZ - 2 ),
409 CSTRIDE_8KHZ );
410 }
411 }
412 /* Find best codebook */
413 CCmax_new = silk_int32_MIN;
414 CBimax_new = 0;
415 for( i = 0; i < nb_cbk_search; i++ ) {
416 if( CC[ i ] > CCmax_new ) {
417 CCmax_new = CC[ i ];
418 CBimax_new = i;
419 }
420 }
421
422 /* Bias towards shorter lags */
423 lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
424 silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
425 silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
426 CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
427
428 /* Bias towards previous lag */
429 silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
430 if( prevLag > 0 ) {
431 delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
432 silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
433 delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
434 prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
435 prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
436 CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
437 }
438
439 if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
440 CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
441 silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */
442 ) {
443 CCmax_b = CCmax_new_b;
444 CCmax = CCmax_new;
445 lag = d;
446 CBimax = CBimax_new;
447 }
448 }
449
450 if( lag == -1 ) {
451 /* No suitable candidate found */
452 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
453 *LTPCorr_Q15 = 0;
454 *lagIndex = 0;
455 *contourIndex = 0;
456 RESTORE_STACK;
457 return 1;
458 }
459
460 /* Output normalized correlation */
461 *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
462 silk_assert( *LTPCorr_Q15 >= 0 );
463
464 if( Fs_kHz > 8 ) {
465 VARDECL( opus_int16, scratch_mem );
466 /***************************************************************************/
467 /* Scale input signal down to avoid correlations measures from overflowing */
468 /***************************************************************************/
469 /* find scaling as max scaling for each subframe */
470 silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
471 ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
472 if( shift > 0 ) {
473 /* Move signal to scratch mem because the input signal should be unchanged */
474 shift = silk_RSHIFT( shift, 1 );
475 for( i = 0; i < frame_length; i++ ) {
476 scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
477 }
478 input_frame_ptr = scratch_mem;
479 } else {
480 input_frame_ptr = frame;
481 }
482
483 /* Search in original signal */
484
485 CBimax_old = CBimax;
486 /* Compensate for decimation */
487 silk_assert( lag == silk_SAT16( lag ) );
488 if( Fs_kHz == 12 ) {
489 lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
490 } else if( Fs_kHz == 16 ) {
491 lag = silk_LSHIFT( lag, 1 );
492 } else {
493 lag = silk_SMULBB( lag, 3 );
494 }
495
496 lag = silk_LIMIT_int( lag, min_lag, max_lag );
497 start_lag = silk_max_int( lag - 2, min_lag );
498 end_lag = silk_min_int( lag + 2, max_lag );
499 lag_new = lag; /* to avoid undefined lag */
500 CBimax = 0; /* to avoid undefined lag */
501
502 CCmax = silk_int32_MIN;
503 /* pitch lags according to second stage */
504 for( k = 0; k < nb_subfr; k++ ) {
505 pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
506 }
507
508 /* Set up codebook parameters according to complexity setting and frame length */
509 if( nb_subfr == PE_MAX_NB_SUBFR ) {
510 nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
511 cbk_size = PE_NB_CBKS_STAGE3_MAX;
512 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
513 } else {
514 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
515 cbk_size = PE_NB_CBKS_STAGE3_10MS;
516 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
517 }
518
519 /* Calculate the correlations and energies needed in stage 3 */
520 ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
521 ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
522 silk_P_Ana_calc_corr_st3( cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
523 silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
524
525 lag_counter = 0;
526 silk_assert( lag == silk_SAT16( lag ) );
527 contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
528
529 target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
530 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
531 for( d = start_lag; d <= end_lag; d++ ) {
532 for( j = 0; j < nb_cbk_search; j++ ) {
533 cross_corr = 0;
534 energy = energy_target;
535 for( k = 0; k < nb_subfr; k++ ) {
536 cross_corr = silk_ADD32( cross_corr,
537 matrix_ptr( cross_corr_st3, k, j,
538 nb_cbk_search )[ lag_counter ] );
539 energy = silk_ADD32( energy,
540 matrix_ptr( energies_st3, k, j,
541 nb_cbk_search )[ lag_counter ] );
542 silk_assert( energy >= 0 );
543 }
544 if( cross_corr > 0 ) {
545 CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */
546 /* Reduce depending on flatness of contour */
547 diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */
548 silk_assert( diff == silk_SAT16( diff ) );
549 CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */
550 } else {
551 CCmax_new = 0;
552 }
553
554 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
555 CCmax = CCmax_new;
556 lag_new = d;
557 CBimax = j;
558 }
559 }
560 lag_counter++;
561 }
562
563 for( k = 0; k < nb_subfr; k++ ) {
564 pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
565 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
566 }
567 *lagIndex = (opus_int16)( lag_new - min_lag);
568 *contourIndex = (opus_int8)CBimax;
569 } else { /* Fs_kHz == 8 */
570 /* Save Lags */
571 for( k = 0; k < nb_subfr; k++ ) {
572 pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
573 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
574 }
575 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
576 *contourIndex = (opus_int8)CBimax;
577 }
578 silk_assert( *lagIndex >= 0 );
579 /* return as voiced */
580 RESTORE_STACK;
581 return 0;
582 }
583
584 /***********************************************************************
585 * Calculates the correlations used in stage 3 search. In order to cover
586 * the whole lag codebook for all the searched offset lags (lag +- 2),
587 * the following correlations are needed in each sub frame:
588 *
589 * sf1: lag range [-8,...,7] total 16 correlations
590 * sf2: lag range [-4,...,4] total 9 correlations
591 * sf3: lag range [-3,....4] total 8 correltions
592 * sf4: lag range [-6,....8] total 15 correlations
593 *
594 * In total 48 correlations. The direct implementation computed in worst
595 * case 4*12*5 = 240 correlations, but more likely around 120.
596 ***********************************************************************/
silk_P_Ana_calc_corr_st3(silk_pe_stage3_vals cross_corr_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity,int arch)597 static void silk_P_Ana_calc_corr_st3(
598 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
599 const opus_int16 frame[], /* I vector to correlate */
600 opus_int start_lag, /* I lag offset to search around */
601 opus_int sf_length, /* I length of a 5 ms subframe */
602 opus_int nb_subfr, /* I number of subframes */
603 opus_int complexity, /* I Complexity setting */
604 int arch /* I Run-time architecture */
605 )
606 {
607 const opus_int16 *target_ptr;
608 opus_int i, j, k, lag_counter, lag_low, lag_high;
609 opus_int nb_cbk_search, delta, idx, cbk_size;
610 VARDECL( opus_int32, scratch_mem );
611 VARDECL( opus_int32, xcorr32 );
612 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
613 SAVE_STACK;
614
615 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
616 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
617
618 if( nb_subfr == PE_MAX_NB_SUBFR ) {
619 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
620 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
621 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
622 cbk_size = PE_NB_CBKS_STAGE3_MAX;
623 } else {
624 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
625 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
626 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
627 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
628 cbk_size = PE_NB_CBKS_STAGE3_10MS;
629 }
630 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
631 ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
632
633 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
634 for( k = 0; k < nb_subfr; k++ ) {
635 lag_counter = 0;
636
637 /* Calculate the correlations for each subframe */
638 lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
639 lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
640 silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
641 celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
642 for( j = lag_low; j <= lag_high; j++ ) {
643 silk_assert( lag_counter < SCRATCH_SIZE );
644 scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
645 lag_counter++;
646 }
647
648 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
649 for( i = 0; i < nb_cbk_search; i++ ) {
650 /* Fill out the 3 dim array that stores the correlations for */
651 /* each code_book vector for each start lag */
652 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
653 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
654 silk_assert( idx + j < SCRATCH_SIZE );
655 silk_assert( idx + j < lag_counter );
656 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
657 scratch_mem[ idx + j ];
658 }
659 }
660 target_ptr += sf_length;
661 }
662 RESTORE_STACK;
663 }
664
665 /********************************************************************/
666 /* Calculate the energies for first two subframes. The energies are */
667 /* calculated recursively. */
668 /********************************************************************/
silk_P_Ana_calc_energy_st3(silk_pe_stage3_vals energies_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity,int arch)669 static void silk_P_Ana_calc_energy_st3(
670 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
671 const opus_int16 frame[], /* I vector to calc energy in */
672 opus_int start_lag, /* I lag offset to search around */
673 opus_int sf_length, /* I length of one 5 ms subframe */
674 opus_int nb_subfr, /* I number of subframes */
675 opus_int complexity, /* I Complexity setting */
676 int arch /* I Run-time architecture */
677 )
678 {
679 const opus_int16 *target_ptr, *basis_ptr;
680 opus_int32 energy;
681 opus_int k, i, j, lag_counter;
682 opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
683 VARDECL( opus_int32, scratch_mem );
684 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
685 SAVE_STACK;
686
687 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
688 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
689
690 if( nb_subfr == PE_MAX_NB_SUBFR ) {
691 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
692 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
693 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
694 cbk_size = PE_NB_CBKS_STAGE3_MAX;
695 } else {
696 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
697 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
698 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
699 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
700 cbk_size = PE_NB_CBKS_STAGE3_10MS;
701 }
702 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
703
704 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
705 for( k = 0; k < nb_subfr; k++ ) {
706 lag_counter = 0;
707
708 /* Calculate the energy for first lag */
709 basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
710 energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
711 silk_assert( energy >= 0 );
712 scratch_mem[ lag_counter ] = energy;
713 lag_counter++;
714
715 lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
716 for( i = 1; i < lag_diff; i++ ) {
717 /* remove part outside new window */
718 energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
719 silk_assert( energy >= 0 );
720
721 /* add part that comes into window */
722 energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
723 silk_assert( energy >= 0 );
724 silk_assert( lag_counter < SCRATCH_SIZE );
725 scratch_mem[ lag_counter ] = energy;
726 lag_counter++;
727 }
728
729 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
730 for( i = 0; i < nb_cbk_search; i++ ) {
731 /* Fill out the 3 dim array that stores the correlations for */
732 /* each code_book vector for each start lag */
733 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
734 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
735 silk_assert( idx + j < SCRATCH_SIZE );
736 silk_assert( idx + j < lag_counter );
737 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
738 scratch_mem[ idx + j ];
739 silk_assert(
740 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
741 }
742 }
743 target_ptr += sf_length;
744 }
745 RESTORE_STACK;
746 }
747