1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 /***********************************************************
33 * Pitch analyser function
34 ********************************************************** */
35 #include "SigProc_FIX.h"
36 #include "pitch_est_defines.h"
37 #include "stack_alloc.h"
38 #include "debug.h"
39 #include "pitch.h"
40
41 #define SCRATCH_SIZE 22
42 #define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
43 #define SF_LENGTH_8KHZ ( PE_SUBFR_LENGTH_MS * 8 )
44 #define MIN_LAG_4KHZ ( PE_MIN_LAG_MS * 4 )
45 #define MIN_LAG_8KHZ ( PE_MIN_LAG_MS * 8 )
46 #define MAX_LAG_4KHZ ( PE_MAX_LAG_MS * 4 )
47 #define MAX_LAG_8KHZ ( PE_MAX_LAG_MS * 8 - 1 )
48 #define CSTRIDE_4KHZ ( MAX_LAG_4KHZ + 1 - MIN_LAG_4KHZ )
49 #define CSTRIDE_8KHZ ( MAX_LAG_8KHZ + 3 - ( MIN_LAG_8KHZ - 2 ) )
50 #define D_COMP_MIN ( MIN_LAG_8KHZ - 3 )
51 #define D_COMP_MAX ( MAX_LAG_8KHZ + 4 )
52 #define D_COMP_STRIDE ( D_COMP_MAX - D_COMP_MIN )
53
54 typedef opus_int32 silk_pe_stage3_vals[ PE_NB_STAGE3_LAGS ];
55
56 /************************************************************/
57 /* Internally used functions */
58 /************************************************************/
59 static void silk_P_Ana_calc_corr_st3(
60 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
61 const opus_int16 frame[], /* I vector to correlate */
62 opus_int start_lag, /* I lag offset to search around */
63 opus_int sf_length, /* I length of a 5 ms subframe */
64 opus_int nb_subfr, /* I number of subframes */
65 opus_int complexity, /* I Complexity setting */
66 int arch /* I Run-time architecture */
67 );
68
69 static void silk_P_Ana_calc_energy_st3(
70 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
71 const opus_int16 frame[], /* I vector to calc energy in */
72 opus_int start_lag, /* I lag offset to search around */
73 opus_int sf_length, /* I length of one 5 ms subframe */
74 opus_int nb_subfr, /* I number of subframes */
75 opus_int complexity, /* I Complexity setting */
76 int arch /* I Run-time architecture */
77 );
78
79 /*************************************************************/
80 /* FIXED POINT CORE PITCH ANALYSIS FUNCTION */
81 /*************************************************************/
silk_pitch_analysis_core(const opus_int16 * frame_unscaled,opus_int * pitch_out,opus_int16 * lagIndex,opus_int8 * contourIndex,opus_int * LTPCorr_Q15,opus_int prevLag,const opus_int32 search_thres1_Q16,const opus_int search_thres2_Q13,const opus_int Fs_kHz,const opus_int complexity,const opus_int nb_subfr,int arch)82 opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 unvoiced */
83 const opus_int16 *frame_unscaled, /* I Signal of length PE_FRAME_LENGTH_MS*Fs_kHz */
84 opus_int *pitch_out, /* O 4 pitch lag values */
85 opus_int16 *lagIndex, /* O Lag Index */
86 opus_int8 *contourIndex, /* O Pitch contour Index */
87 opus_int *LTPCorr_Q15, /* I/O Normalized correlation; input: value from previous frame */
88 opus_int prevLag, /* I Last lag of previous frame; set to zero is unvoiced */
89 const opus_int32 search_thres1_Q16, /* I First stage threshold for lag candidates 0 - 1 */
90 const opus_int search_thres2_Q13, /* I Final threshold for lag candidates 0 - 1 */
91 const opus_int Fs_kHz, /* I Sample frequency (kHz) */
92 const opus_int complexity, /* I Complexity setting, 0-2, where 2 is highest */
93 const opus_int nb_subfr, /* I number of 5 ms subframes */
94 int arch /* I Run-time architecture */
95 )
96 {
97 VARDECL( opus_int16, frame_8kHz_buf );
98 VARDECL( opus_int16, frame_4kHz );
99 VARDECL( opus_int16, frame_scaled );
100 opus_int32 filt_state[ 6 ];
101 const opus_int16 *frame, *frame_8kHz;
102 opus_int i, k, d, j;
103 VARDECL( opus_int16, C );
104 VARDECL( opus_int32, xcorr32 );
105 const opus_int16 *target_ptr, *basis_ptr;
106 opus_int32 cross_corr, normalizer, energy, energy_basis, energy_target;
107 opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp, shift;
108 VARDECL( opus_int16, d_comp );
109 opus_int32 sum, threshold, lag_counter;
110 opus_int CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;
111 opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;
112 VARDECL( silk_pe_stage3_vals, energies_st3 );
113 VARDECL( silk_pe_stage3_vals, cross_corr_st3 );
114 opus_int frame_length, frame_length_8kHz, frame_length_4kHz;
115 opus_int sf_length;
116 opus_int min_lag;
117 opus_int max_lag;
118 opus_int32 contour_bias_Q15, diff;
119 opus_int nb_cbk_search, cbk_size;
120 opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
121 const opus_int8 *Lag_CB_ptr;
122 SAVE_STACK;
123
124 /* Check for valid sampling frequency */
125 silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
126
127 /* Check for valid complexity setting */
128 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
129 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
130
131 silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );
132 silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );
133
134 /* Set up frame lengths max / min lag for the sampling frequency */
135 frame_length = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;
136 frame_length_4kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 4;
137 frame_length_8kHz = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * 8;
138 sf_length = PE_SUBFR_LENGTH_MS * Fs_kHz;
139 min_lag = PE_MIN_LAG_MS * Fs_kHz;
140 max_lag = PE_MAX_LAG_MS * Fs_kHz - 1;
141
142 /* Downscale input if necessary */
143 silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length );
144 shift += 3 - silk_CLZ32( energy ); /* at least two bits headroom */
145 ALLOC( frame_scaled, frame_length, opus_int16 );
146 if( shift > 0 ) {
147 shift = silk_RSHIFT( shift + 1, 1 );
148 for( i = 0; i < frame_length; i++ ) {
149 frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift );
150 }
151 frame = frame_scaled;
152 } else {
153 frame = frame_unscaled;
154 }
155
156 ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 );
157 /* Resample from input sampled at Fs_kHz to 8 kHz */
158 if( Fs_kHz == 16 ) {
159 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
160 silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
161 frame_8kHz = frame_8kHz_buf;
162 } else if( Fs_kHz == 12 ) {
163 silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
164 silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
165 frame_8kHz = frame_8kHz_buf;
166 } else {
167 silk_assert( Fs_kHz == 8 );
168 frame_8kHz = frame;
169 }
170
171 /* Decimate again to 4 kHz */
172 silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );/* Set state to zero */
173 ALLOC( frame_4kHz, frame_length_4kHz, opus_int16 );
174 silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
175
176 /* Low-pass filter */
177 for( i = frame_length_4kHz - 1; i > 0; i-- ) {
178 frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
179 }
180
181
182 /******************************************************************************
183 * FIRST STAGE, operating in 4 khz
184 ******************************************************************************/
185 ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
186 ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
187 silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
188 target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
189 for( k = 0; k < nb_subfr >> 1; k++ ) {
190 /* Check that we are within range of the array */
191 silk_assert( target_ptr >= frame_4kHz );
192 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
193
194 basis_ptr = target_ptr - MIN_LAG_4KHZ;
195
196 /* Check that we are within range of the array */
197 silk_assert( basis_ptr >= frame_4kHz );
198 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
199
200 celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1, arch );
201
202 /* Calculate first vector products before loop */
203 cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
204 normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch );
205 normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch ) );
206 normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
207
208 matrix_ptr( C, k, 0, CSTRIDE_4KHZ ) =
209 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
210
211 /* From now on normalizer is computed recursively */
212 for( d = MIN_LAG_4KHZ + 1; d <= MAX_LAG_4KHZ; d++ ) {
213 basis_ptr--;
214
215 /* Check that we are within range of the array */
216 silk_assert( basis_ptr >= frame_4kHz );
217 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
218
219 cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
220
221 /* Add contribution of new sample and remove contribution from oldest sample */
222 normalizer = silk_ADD32( normalizer,
223 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -
224 silk_SMULBB( basis_ptr[ SF_LENGTH_8KHZ ], basis_ptr[ SF_LENGTH_8KHZ ] ) );
225
226 matrix_ptr( C, k, d - MIN_LAG_4KHZ, CSTRIDE_4KHZ) =
227 (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */
228 }
229 /* Update target pointer */
230 target_ptr += SF_LENGTH_8KHZ;
231 }
232
233 /* Combine two subframes into single correlation measure and apply short-lag bias */
234 if( nb_subfr == PE_MAX_NB_SUBFR ) {
235 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
236 sum = (opus_int32)matrix_ptr( C, 0, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ )
237 + (opus_int32)matrix_ptr( C, 1, i - MIN_LAG_4KHZ, CSTRIDE_4KHZ ); /* Q14 */
238 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
239 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
240 }
241 } else {
242 /* Only short-lag bias */
243 for( i = MAX_LAG_4KHZ; i >= MIN_LAG_4KHZ; i-- ) {
244 sum = silk_LSHIFT( (opus_int32)C[ i - MIN_LAG_4KHZ ], 1 ); /* Q14 */
245 sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) ); /* Q14 */
246 C[ i - MIN_LAG_4KHZ ] = (opus_int16)sum; /* Q14 */
247 }
248 }
249
250 /* Sort */
251 length_d_srch = silk_ADD_LSHIFT32( 4, complexity, 1 );
252 silk_assert( 3 * length_d_srch <= PE_D_SRCH_LENGTH );
253 silk_insertion_sort_decreasing_int16( C, d_srch, CSTRIDE_4KHZ,
254 length_d_srch );
255
256 /* Escape if correlation is very low already here */
257 Cmax = (opus_int)C[ 0 ]; /* Q14 */
258 if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {
259 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
260 *LTPCorr_Q15 = 0;
261 *lagIndex = 0;
262 *contourIndex = 0;
263 RESTORE_STACK;
264 return 1;
265 }
266
267 threshold = silk_SMULWB( search_thres1_Q16, Cmax );
268 for( i = 0; i < length_d_srch; i++ ) {
269 /* Convert to 8 kHz indices for the sorted correlation that exceeds the threshold */
270 if( C[ i ] > threshold ) {
271 d_srch[ i ] = silk_LSHIFT( d_srch[ i ] + MIN_LAG_4KHZ, 1 );
272 } else {
273 length_d_srch = i;
274 break;
275 }
276 }
277 silk_assert( length_d_srch > 0 );
278
279 ALLOC( d_comp, D_COMP_STRIDE, opus_int16 );
280 for( i = D_COMP_MIN; i < D_COMP_MAX; i++ ) {
281 d_comp[ i - D_COMP_MIN ] = 0;
282 }
283 for( i = 0; i < length_d_srch; i++ ) {
284 d_comp[ d_srch[ i ] - D_COMP_MIN ] = 1;
285 }
286
287 /* Convolution */
288 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
289 d_comp[ i - D_COMP_MIN ] +=
290 d_comp[ i - 1 - D_COMP_MIN ] + d_comp[ i - 2 - D_COMP_MIN ];
291 }
292
293 length_d_srch = 0;
294 for( i = MIN_LAG_8KHZ; i < MAX_LAG_8KHZ + 1; i++ ) {
295 if( d_comp[ i + 1 - D_COMP_MIN ] > 0 ) {
296 d_srch[ length_d_srch ] = i;
297 length_d_srch++;
298 }
299 }
300
301 /* Convolution */
302 for( i = D_COMP_MAX - 1; i >= MIN_LAG_8KHZ; i-- ) {
303 d_comp[ i - D_COMP_MIN ] += d_comp[ i - 1 - D_COMP_MIN ]
304 + d_comp[ i - 2 - D_COMP_MIN ] + d_comp[ i - 3 - D_COMP_MIN ];
305 }
306
307 length_d_comp = 0;
308 for( i = MIN_LAG_8KHZ; i < D_COMP_MAX; i++ ) {
309 if( d_comp[ i - D_COMP_MIN ] > 0 ) {
310 d_comp[ length_d_comp ] = i - 2;
311 length_d_comp++;
312 }
313 }
314
315 /**********************************************************************************
316 ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
317 *************************************************************************************/
318
319 /*********************************************************************************
320 * Find energy of each subframe projected onto its history, for a range of delays
321 *********************************************************************************/
322 silk_memset( C, 0, nb_subfr * CSTRIDE_8KHZ * sizeof( opus_int16 ) );
323
324 target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];
325 for( k = 0; k < nb_subfr; k++ ) {
326
327 /* Check that we are within range of the array */
328 silk_assert( target_ptr >= frame_8kHz );
329 silk_assert( target_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
330
331 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ, arch ), 1 );
332 for( j = 0; j < length_d_comp; j++ ) {
333 d = d_comp[ j ];
334 basis_ptr = target_ptr - d;
335
336 /* Check that we are within range of the array */
337 silk_assert( basis_ptr >= frame_8kHz );
338 silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_8kHz + frame_length_8kHz );
339
340 cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
341 if( cross_corr > 0 ) {
342 energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ, arch );
343 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) =
344 (opus_int16)silk_DIV32_varQ( cross_corr,
345 silk_ADD32( energy_target,
346 energy_basis ),
347 13 + 1 ); /* Q13 */
348 } else {
349 matrix_ptr( C, k, d - ( MIN_LAG_8KHZ - 2 ), CSTRIDE_8KHZ ) = 0;
350 }
351 }
352 target_ptr += SF_LENGTH_8KHZ;
353 }
354
355 /* search over lag range and lags codebook */
356 /* scale factor for lag codebook, as a function of center lag */
357
358 CCmax = silk_int32_MIN;
359 CCmax_b = silk_int32_MIN;
360
361 CBimax = 0; /* To avoid returning undefined lag values */
362 lag = -1; /* To check if lag with strong enough correlation has been found */
363
364 if( prevLag > 0 ) {
365 if( Fs_kHz == 12 ) {
366 prevLag = silk_DIV32_16( silk_LSHIFT( prevLag, 1 ), 3 );
367 } else if( Fs_kHz == 16 ) {
368 prevLag = silk_RSHIFT( prevLag, 1 );
369 }
370 prevLag_log2_Q7 = silk_lin2log( (opus_int32)prevLag );
371 } else {
372 prevLag_log2_Q7 = 0;
373 }
374 silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );
375 /* Set up stage 2 codebook based on number of subframes */
376 if( nb_subfr == PE_MAX_NB_SUBFR ) {
377 cbk_size = PE_NB_CBKS_STAGE2_EXT;
378 Lag_CB_ptr = &silk_CB_lags_stage2[ 0 ][ 0 ];
379 if( Fs_kHz == 8 && complexity > SILK_PE_MIN_COMPLEX ) {
380 /* If input is 8 khz use a larger codebook here because it is last stage */
381 nb_cbk_search = PE_NB_CBKS_STAGE2_EXT;
382 } else {
383 nb_cbk_search = PE_NB_CBKS_STAGE2;
384 }
385 } else {
386 cbk_size = PE_NB_CBKS_STAGE2_10MS;
387 Lag_CB_ptr = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];
388 nb_cbk_search = PE_NB_CBKS_STAGE2_10MS;
389 }
390
391 for( k = 0; k < length_d_srch; k++ ) {
392 d = d_srch[ k ];
393 for( j = 0; j < nb_cbk_search; j++ ) {
394 CC[ j ] = 0;
395 for( i = 0; i < nb_subfr; i++ ) {
396 opus_int d_subfr;
397 /* Try all codebooks */
398 d_subfr = d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size );
399 CC[ j ] = CC[ j ]
400 + (opus_int32)matrix_ptr( C, i,
401 d_subfr - ( MIN_LAG_8KHZ - 2 ),
402 CSTRIDE_8KHZ );
403 }
404 }
405 /* Find best codebook */
406 CCmax_new = silk_int32_MIN;
407 CBimax_new = 0;
408 for( i = 0; i < nb_cbk_search; i++ ) {
409 if( CC[ i ] > CCmax_new ) {
410 CCmax_new = CC[ i ];
411 CBimax_new = i;
412 }
413 }
414
415 /* Bias towards shorter lags */
416 lag_log2_Q7 = silk_lin2log( d ); /* Q7 */
417 silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );
418 silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );
419 CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */
420
421 /* Bias towards previous lag */
422 silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );
423 if( prevLag > 0 ) {
424 delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;
425 silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );
426 delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );
427 prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */
428 prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );
429 CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */
430 }
431
432 if( CCmax_new_b > CCmax_b && /* Find maximum biased correlation */
433 CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 ) && /* Correlation needs to be high enough to be voiced */
434 silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= MIN_LAG_8KHZ /* Lag must be in range */
435 ) {
436 CCmax_b = CCmax_new_b;
437 CCmax = CCmax_new;
438 lag = d;
439 CBimax = CBimax_new;
440 }
441 }
442
443 if( lag == -1 ) {
444 /* No suitable candidate found */
445 silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );
446 *LTPCorr_Q15 = 0;
447 *lagIndex = 0;
448 *contourIndex = 0;
449 RESTORE_STACK;
450 return 1;
451 }
452
453 /* Output normalized correlation */
454 *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );
455 silk_assert( *LTPCorr_Q15 >= 0 );
456
457 if( Fs_kHz > 8 ) {
458 /* Search in original signal */
459
460 CBimax_old = CBimax;
461 /* Compensate for decimation */
462 silk_assert( lag == silk_SAT16( lag ) );
463 if( Fs_kHz == 12 ) {
464 lag = silk_RSHIFT( silk_SMULBB( lag, 3 ), 1 );
465 } else if( Fs_kHz == 16 ) {
466 lag = silk_LSHIFT( lag, 1 );
467 } else {
468 lag = silk_SMULBB( lag, 3 );
469 }
470
471 lag = silk_LIMIT_int( lag, min_lag, max_lag );
472 start_lag = silk_max_int( lag - 2, min_lag );
473 end_lag = silk_min_int( lag + 2, max_lag );
474 lag_new = lag; /* to avoid undefined lag */
475 CBimax = 0; /* to avoid undefined lag */
476
477 CCmax = silk_int32_MIN;
478 /* pitch lags according to second stage */
479 for( k = 0; k < nb_subfr; k++ ) {
480 pitch_out[ k ] = lag + 2 * silk_CB_lags_stage2[ k ][ CBimax_old ];
481 }
482
483 /* Set up codebook parameters according to complexity setting and frame length */
484 if( nb_subfr == PE_MAX_NB_SUBFR ) {
485 nb_cbk_search = (opus_int)silk_nb_cbk_searchs_stage3[ complexity ];
486 cbk_size = PE_NB_CBKS_STAGE3_MAX;
487 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
488 } else {
489 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
490 cbk_size = PE_NB_CBKS_STAGE3_10MS;
491 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
492 }
493
494 /* Calculate the correlations and energies needed in stage 3 */
495 ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
496 ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
497 silk_P_Ana_calc_corr_st3( cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
498 silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
499
500 lag_counter = 0;
501 silk_assert( lag == silk_SAT16( lag ) );
502 contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
503
504 target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
505 energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
506 for( d = start_lag; d <= end_lag; d++ ) {
507 for( j = 0; j < nb_cbk_search; j++ ) {
508 cross_corr = 0;
509 energy = energy_target;
510 for( k = 0; k < nb_subfr; k++ ) {
511 cross_corr = silk_ADD32( cross_corr,
512 matrix_ptr( cross_corr_st3, k, j,
513 nb_cbk_search )[ lag_counter ] );
514 energy = silk_ADD32( energy,
515 matrix_ptr( energies_st3, k, j,
516 nb_cbk_search )[ lag_counter ] );
517 silk_assert( energy >= 0 );
518 }
519 if( cross_corr > 0 ) {
520 CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 ); /* Q13 */
521 /* Reduce depending on flatness of contour */
522 diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j ); /* Q15 */
523 silk_assert( diff == silk_SAT16( diff ) );
524 CCmax_new = silk_SMULWB( CCmax_new, diff ); /* Q14 */
525 } else {
526 CCmax_new = 0;
527 }
528
529 if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {
530 CCmax = CCmax_new;
531 lag_new = d;
532 CBimax = j;
533 }
534 }
535 lag_counter++;
536 }
537
538 for( k = 0; k < nb_subfr; k++ ) {
539 pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
540 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, PE_MAX_LAG_MS * Fs_kHz );
541 }
542 *lagIndex = (opus_int16)( lag_new - min_lag);
543 *contourIndex = (opus_int8)CBimax;
544 } else { /* Fs_kHz == 8 */
545 /* Save Lags */
546 for( k = 0; k < nb_subfr; k++ ) {
547 pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
548 pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], MIN_LAG_8KHZ, PE_MAX_LAG_MS * 8 );
549 }
550 *lagIndex = (opus_int16)( lag - MIN_LAG_8KHZ );
551 *contourIndex = (opus_int8)CBimax;
552 }
553 silk_assert( *lagIndex >= 0 );
554 /* return as voiced */
555 RESTORE_STACK;
556 return 0;
557 }
558
559 /***********************************************************************
560 * Calculates the correlations used in stage 3 search. In order to cover
561 * the whole lag codebook for all the searched offset lags (lag +- 2),
562 * the following correlations are needed in each sub frame:
563 *
564 * sf1: lag range [-8,...,7] total 16 correlations
565 * sf2: lag range [-4,...,4] total 9 correlations
566 * sf3: lag range [-3,....4] total 8 correltions
567 * sf4: lag range [-6,....8] total 15 correlations
568 *
569 * In total 48 correlations. The direct implementation computed in worst
570 * case 4*12*5 = 240 correlations, but more likely around 120.
571 ***********************************************************************/
silk_P_Ana_calc_corr_st3(silk_pe_stage3_vals cross_corr_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity,int arch)572 static void silk_P_Ana_calc_corr_st3(
573 silk_pe_stage3_vals cross_corr_st3[], /* O 3 DIM correlation array */
574 const opus_int16 frame[], /* I vector to correlate */
575 opus_int start_lag, /* I lag offset to search around */
576 opus_int sf_length, /* I length of a 5 ms subframe */
577 opus_int nb_subfr, /* I number of subframes */
578 opus_int complexity, /* I Complexity setting */
579 int arch /* I Run-time architecture */
580 )
581 {
582 const opus_int16 *target_ptr;
583 opus_int i, j, k, lag_counter, lag_low, lag_high;
584 opus_int nb_cbk_search, delta, idx, cbk_size;
585 VARDECL( opus_int32, scratch_mem );
586 VARDECL( opus_int32, xcorr32 );
587 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
588 SAVE_STACK;
589
590 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
591 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
592
593 if( nb_subfr == PE_MAX_NB_SUBFR ) {
594 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
595 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
596 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
597 cbk_size = PE_NB_CBKS_STAGE3_MAX;
598 } else {
599 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
600 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
601 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
602 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
603 cbk_size = PE_NB_CBKS_STAGE3_10MS;
604 }
605 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
606 ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
607
608 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
609 for( k = 0; k < nb_subfr; k++ ) {
610 lag_counter = 0;
611
612 /* Calculate the correlations for each subframe */
613 lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
614 lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
615 silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
616 celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1, arch );
617 for( j = lag_low; j <= lag_high; j++ ) {
618 silk_assert( lag_counter < SCRATCH_SIZE );
619 scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
620 lag_counter++;
621 }
622
623 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
624 for( i = 0; i < nb_cbk_search; i++ ) {
625 /* Fill out the 3 dim array that stores the correlations for */
626 /* each code_book vector for each start lag */
627 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
628 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
629 silk_assert( idx + j < SCRATCH_SIZE );
630 silk_assert( idx + j < lag_counter );
631 matrix_ptr( cross_corr_st3, k, i, nb_cbk_search )[ j ] =
632 scratch_mem[ idx + j ];
633 }
634 }
635 target_ptr += sf_length;
636 }
637 RESTORE_STACK;
638 }
639
640 /********************************************************************/
641 /* Calculate the energies for first two subframes. The energies are */
642 /* calculated recursively. */
643 /********************************************************************/
silk_P_Ana_calc_energy_st3(silk_pe_stage3_vals energies_st3[],const opus_int16 frame[],opus_int start_lag,opus_int sf_length,opus_int nb_subfr,opus_int complexity,int arch)644 static void silk_P_Ana_calc_energy_st3(
645 silk_pe_stage3_vals energies_st3[], /* O 3 DIM energy array */
646 const opus_int16 frame[], /* I vector to calc energy in */
647 opus_int start_lag, /* I lag offset to search around */
648 opus_int sf_length, /* I length of one 5 ms subframe */
649 opus_int nb_subfr, /* I number of subframes */
650 opus_int complexity, /* I Complexity setting */
651 int arch /* I Run-time architecture */
652 )
653 {
654 const opus_int16 *target_ptr, *basis_ptr;
655 opus_int32 energy;
656 opus_int k, i, j, lag_counter;
657 opus_int nb_cbk_search, delta, idx, cbk_size, lag_diff;
658 VARDECL( opus_int32, scratch_mem );
659 const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
660 SAVE_STACK;
661
662 silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
663 silk_assert( complexity <= SILK_PE_MAX_COMPLEX );
664
665 if( nb_subfr == PE_MAX_NB_SUBFR ) {
666 Lag_range_ptr = &silk_Lag_range_stage3[ complexity ][ 0 ][ 0 ];
667 Lag_CB_ptr = &silk_CB_lags_stage3[ 0 ][ 0 ];
668 nb_cbk_search = silk_nb_cbk_searchs_stage3[ complexity ];
669 cbk_size = PE_NB_CBKS_STAGE3_MAX;
670 } else {
671 silk_assert( nb_subfr == PE_MAX_NB_SUBFR >> 1);
672 Lag_range_ptr = &silk_Lag_range_stage3_10_ms[ 0 ][ 0 ];
673 Lag_CB_ptr = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];
674 nb_cbk_search = PE_NB_CBKS_STAGE3_10MS;
675 cbk_size = PE_NB_CBKS_STAGE3_10MS;
676 }
677 ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
678
679 target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ];
680 for( k = 0; k < nb_subfr; k++ ) {
681 lag_counter = 0;
682
683 /* Calculate the energy for first lag */
684 basis_ptr = target_ptr - ( start_lag + matrix_ptr( Lag_range_ptr, k, 0, 2 ) );
685 energy = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length, arch );
686 silk_assert( energy >= 0 );
687 scratch_mem[ lag_counter ] = energy;
688 lag_counter++;
689
690 lag_diff = ( matrix_ptr( Lag_range_ptr, k, 1, 2 ) - matrix_ptr( Lag_range_ptr, k, 0, 2 ) + 1 );
691 for( i = 1; i < lag_diff; i++ ) {
692 /* remove part outside new window */
693 energy -= silk_SMULBB( basis_ptr[ sf_length - i ], basis_ptr[ sf_length - i ] );
694 silk_assert( energy >= 0 );
695
696 /* add part that comes into window */
697 energy = silk_ADD_SAT32( energy, silk_SMULBB( basis_ptr[ -i ], basis_ptr[ -i ] ) );
698 silk_assert( energy >= 0 );
699 silk_assert( lag_counter < SCRATCH_SIZE );
700 scratch_mem[ lag_counter ] = energy;
701 lag_counter++;
702 }
703
704 delta = matrix_ptr( Lag_range_ptr, k, 0, 2 );
705 for( i = 0; i < nb_cbk_search; i++ ) {
706 /* Fill out the 3 dim array that stores the correlations for */
707 /* each code_book vector for each start lag */
708 idx = matrix_ptr( Lag_CB_ptr, k, i, cbk_size ) - delta;
709 for( j = 0; j < PE_NB_STAGE3_LAGS; j++ ) {
710 silk_assert( idx + j < SCRATCH_SIZE );
711 silk_assert( idx + j < lag_counter );
712 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] =
713 scratch_mem[ idx + j ];
714 silk_assert(
715 matrix_ptr( energies_st3, k, i, nb_cbk_search )[ j ] >= 0 );
716 }
717 }
718 target_ptr += sf_length;
719 }
720 RESTORE_STACK;
721 }
722