1 /*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 /***********************************************************************
18 * File: wb_vad.c *
19 * *
20 * Description: Voice Activity Detection *
21 * *
22 ************************************************************************/
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include "cnst.h"
27 #include "wb_vad.h"
28 #include "typedef.h"
29 #include "basic_op.h"
30 #include "math_op.h"
31 #include "wb_vad_c.h"
32 #include "mem_align.h"
33
34 /******************************************************************************
35 * Calculate Log2 and scale the signal:
36 *
37 * ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
38 *
39 * input output
40 * 32768 16384
41 * 1 31744
42 *
43 * When input is in the range of [1,2^16], max error is 0.0380%.
44 *********************************************************************************/
45
ilog2(Word16 mant)46 static Word16 ilog2( /* return: output value of the log2 */
47 Word16 mant /* i: value to be converted */
48 )
49 {
50 Word16 ex, ex2, res;
51 Word32 i, l_temp;
52
53 if (mant <= 0)
54 {
55 mant = 1;
56 }
57 ex = norm_s(mant);
58 mant = mant << ex;
59
60 for (i = 0; i < 3; i++)
61 mant = vo_mult(mant, mant);
62 l_temp = vo_L_mult(mant, mant);
63
64 ex2 = norm_l(l_temp);
65 mant = extract_h(l_temp << ex2);
66
67 res = (ex + 16) << 10;
68 res = add1(res, (ex2 << 6));
69 res = vo_sub(add1(res, 127), (mant >> 8));
70 return (res);
71 }
72
73 /******************************************************************************
74 *
75 * Function : filter5
76 * Purpose : Fifth-order half-band lowpass/highpass filter pair with
77 * decimation.
78 *
79 *******************************************************************************/
80
filter5(Word16 * in0,Word16 * in1,Word16 data[])81 static void filter5(
82 Word16 * in0, /* i/o : input values; output low-pass part */
83 Word16 * in1, /* i/o : input values; output high-pass part */
84 Word16 data[] /* i/o : filter memory */
85 )
86 {
87 Word16 temp0, temp1, temp2;
88
89 temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
90 temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
91 data[0] = temp0;
92
93 temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
94 temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
95 data[1] = temp0;
96
97 *in0 = extract_h((vo_L_add(temp1, temp2) << 15));
98 *in1 = extract_h((vo_L_sub(temp1, temp2) << 15));
99 }
100
101 /******************************************************************************
102 *
103 * Function : filter3
104 * Purpose : Third-order half-band lowpass/highpass filter pair with
105 * decimation.
106 *
107 *******************************************************************************/
108
filter3(Word16 * in0,Word16 * in1,Word16 * data)109 static void filter3(
110 Word16 * in0, /* i/o : input values; output low-pass part */
111 Word16 * in1, /* i/o : input values; output high-pass part */
112 Word16 * data /* i/o : filter memory */
113 )
114 {
115 Word16 temp1, temp2;
116
117 temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
118 temp2 = add1(*data, vo_mult(COEFF3, temp1));
119 *data = temp1;
120
121 *in1 = extract_h((vo_L_sub(*in0, temp2) << 15));
122 *in0 = extract_h((vo_L_add(*in0, temp2) << 15));
123 }
124
125 /******************************************************************************
126 *
127 * Function : level_calculation
128 * Purpose : Calculate signal level in a sub-band. Level is calculated
129 * by summing absolute values of the input data.
130 *
131 * Signal level calculated from of the end of the frame
132 * (data[count1 - count2]) is stored to (*sub_level)
133 * and added to the level of the next frame.
134 *
135 ******************************************************************************/
136
level_calculation(Word16 data[],Word16 * sub_level,Word16 count1,Word16 count2,Word16 ind_m,Word16 ind_a,Word16 scale)137 static Word16 level_calculation( /* return: signal level */
138 Word16 data[], /* i : signal buffer */
139 Word16 * sub_level, /* i : level calculated at the end of the previous frame*/
140 /* o : level of signal calculated from the last */
141 /* (count2 - count1) samples */
142 Word16 count1, /* i : number of samples to be counted */
143 Word16 count2, /* i : number of samples to be counted */
144 Word16 ind_m, /* i : step size for the index of the data buffer */
145 Word16 ind_a, /* i : starting index of the data buffer */
146 Word16 scale /* i : scaling for the level calculation */
147 )
148 {
149 Word32 i, l_temp1, l_temp2;
150 Word16 level;
151
152 l_temp1 = 0L;
153 for (i = count1; i < count2; i++)
154 {
155 l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
156 }
157
158 l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
159 *sub_level = extract_h(L_shl(l_temp1, scale));
160
161 for (i = 0; i < count1; i++)
162 {
163 l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
164 }
165 level = extract_h(L_shl2(l_temp2, scale));
166
167 return level;
168 }
169
170 /******************************************************************************
171 *
172 * Function : filter_bank
173 * Purpose : Divide input signal into bands and calculate level of
174 * the signal in each band
175 *
176 *******************************************************************************/
177
filter_bank(VadVars * st,Word16 in[],Word16 level[])178 static void filter_bank(
179 VadVars * st, /* i/o : State struct */
180 Word16 in[], /* i : input frame */
181 Word16 level[] /* o : signal levels at each band */
182 )
183 {
184 Word32 i;
185 Word16 tmp_buf[FRAME_LEN];
186
187 /* shift input 1 bit down for safe scaling */
188 for (i = 0; i < FRAME_LEN; i++)
189 {
190 tmp_buf[i] = in[i] >> 1;
191 }
192
193 /* run the filter bank */
194 for (i = 0; i < 128; i++)
195 {
196 filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
197 }
198 for (i = 0; i < 64; i++)
199 {
200 filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
201 filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
202 }
203 for (i = 0; i < 32; i++)
204 {
205 filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
206 filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
207 filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
208 }
209 for (i = 0; i < 16; i++)
210 {
211 filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
212 filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
213 filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
214 }
215
216 for (i = 0; i < 8; i++)
217 {
218 filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
219 filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
220 }
221
222 /* calculate levels in each frequency band */
223
224 /* 4800 - 6400 Hz */
225 level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);
226 /* 4000 - 4800 Hz */
227 level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);
228 /* 3200 - 4000 Hz */
229 level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);
230 /* 2400 - 3200 Hz */
231 level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);
232 /* 2000 - 2400 Hz */
233 level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);
234 /* 1600 - 2000 Hz */
235 level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);
236 /* 1200 - 1600 Hz */
237 level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);
238 /* 800 - 1200 Hz */
239 level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);
240 /* 600 - 800 Hz */
241 level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17);
242 /* 400 - 600 Hz */
243 level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);
244 /* 200 - 400 Hz */
245 level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);
246 /* 0 - 200 Hz */
247 level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17);
248 }
249
250 /******************************************************************************
251 *
252 * Function : update_cntrl
253 * Purpose : Control update of the background noise estimate.
254 *
255 *******************************************************************************/
256
update_cntrl(VadVars * st,Word16 level[])257 static void update_cntrl(
258 VadVars * st, /* i/o : State structure */
259 Word16 level[] /* i : sub-band levels of the input frame */
260 )
261 {
262 Word32 i;
263 Word16 num, temp, stat_rat, exp, denom;
264 Word16 alpha;
265
266 /* if a tone has been detected for a while, initialize stat_count */
267 if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
268 {
269 st->stat_count = STAT_COUNT;
270 } else
271 {
272 /* if 8 last vad-decisions have been "0", reinitialize stat_count */
273 if ((st->vadreg & 0x7f80) == 0)
274 {
275 st->stat_count = STAT_COUNT;
276 } else
277 {
278 stat_rat = 0;
279 for (i = 0; i < COMPLEN; i++)
280 {
281 if(level[i] > st->ave_level[i])
282 {
283 num = level[i];
284 denom = st->ave_level[i];
285 } else
286 {
287 num = st->ave_level[i];
288 denom = level[i];
289 }
290 /* Limit nimimum value of num and denom to STAT_THR_LEVEL */
291 if(num < STAT_THR_LEVEL)
292 {
293 num = STAT_THR_LEVEL;
294 }
295 if(denom < STAT_THR_LEVEL)
296 {
297 denom = STAT_THR_LEVEL;
298 }
299 exp = norm_s(denom);
300 denom = denom << exp;
301
302 /* stat_rat = num/denom * 64 */
303 temp = div_s(num >> 1, denom);
304 stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
305 }
306
307 /* compare stat_rat with a threshold and update stat_count */
308 if(stat_rat > STAT_THR)
309 {
310 st->stat_count = STAT_COUNT;
311 } else
312 {
313 if ((st->vadreg & 0x4000) != 0)
314 {
315
316 if (st->stat_count != 0)
317 {
318 st->stat_count = st->stat_count - 1;
319 }
320 }
321 }
322 }
323 }
324
325 /* Update average amplitude estimate for stationarity estimation */
326 alpha = ALPHA4;
327 if(st->stat_count == STAT_COUNT)
328 {
329 alpha = 32767;
330 } else if ((st->vadreg & 0x4000) == 0)
331 {
332 alpha = ALPHA5;
333 }
334 for (i = 0; i < COMPLEN; i++)
335 {
336 st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));
337 }
338 }
339
340 /******************************************************************************
341 *
342 * Function : hangover_addition
343 * Purpose : Add hangover after speech bursts
344 *
345 *******************************************************************************/
346
hangover_addition(VadVars * st,Word16 low_power,Word16 hang_len,Word16 burst_len)347 static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */
348 VadVars * st, /* i/o : State structure */
349 Word16 low_power, /* i : flag power of the input frame */
350 Word16 hang_len, /* i : hangover length */
351 Word16 burst_len /* i : minimum burst length for hangover addition */
352 )
353 {
354 /* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0" */
355 if (low_power != 0)
356 {
357 st->burst_count = 0;
358 st->hang_count = 0;
359 return 0;
360 }
361 /* update the counters (hang_count, burst_count) */
362 if ((st->vadreg & 0x4000) != 0)
363 {
364 st->burst_count = st->burst_count + 1;
365 if(st->burst_count >= burst_len)
366 {
367 st->hang_count = hang_len;
368 }
369 return 1;
370 } else
371 {
372 st->burst_count = 0;
373 if (st->hang_count > 0)
374 {
375 st->hang_count = st->hang_count - 1;
376 return 1;
377 }
378 }
379 return 0;
380 }
381
382 /******************************************************************************
383 *
384 * Function : noise_estimate_update
385 * Purpose : Update of background noise estimate
386 *
387 *******************************************************************************/
388
noise_estimate_update(VadVars * st,Word16 level[])389 static void noise_estimate_update(
390 VadVars * st, /* i/o : State structure */
391 Word16 level[] /* i : sub-band levels of the input frame */
392 )
393 {
394 Word32 i;
395 Word16 alpha_up, alpha_down, bckr_add = 2;
396
397 /* Control update of bckr_est[] */
398 update_cntrl(st, level);
399
400 /* Choose update speed */
401 if ((0x7800 & st->vadreg) == 0)
402 {
403 alpha_up = ALPHA_UP1;
404 alpha_down = ALPHA_DOWN1;
405 } else
406 {
407 if ((st->stat_count == 0))
408 {
409 alpha_up = ALPHA_UP2;
410 alpha_down = ALPHA_DOWN2;
411 } else
412 {
413 alpha_up = 0;
414 alpha_down = ALPHA3;
415 bckr_add = 0;
416 }
417 }
418
419 /* Update noise estimate (bckr_est) */
420 for (i = 0; i < COMPLEN; i++)
421 {
422 Word16 temp;
423 temp = (st->old_level[i] - st->bckr_est[i]);
424
425 if (temp < 0)
426 { /* update downwards */
427 st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp)));
428 /* limit minimum value of the noise estimate to NOISE_MIN */
429 if(st->bckr_est[i] < NOISE_MIN)
430 {
431 st->bckr_est[i] = NOISE_MIN;
432 }
433 } else
434 { /* update upwards */
435 st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));
436
437 /* limit maximum value of the noise estimate to NOISE_MAX */
438 if(st->bckr_est[i] > NOISE_MAX)
439 {
440 st->bckr_est[i] = NOISE_MAX;
441 }
442 }
443 }
444
445 /* Update signal levels of the previous frame (old_level) */
446 for (i = 0; i < COMPLEN; i++)
447 {
448 st->old_level[i] = level[i];
449 }
450 }
451
452 /******************************************************************************
453 *
454 * Function : vad_decision
455 * Purpose : Calculates VAD_flag
456 *
457 *******************************************************************************/
458
vad_decision(VadVars * st,Word16 level[COMPLEN],Word32 pow_sum)459 static Word16 vad_decision( /* return value : VAD_flag */
460 VadVars * st, /* i/o : State structure */
461 Word16 level[COMPLEN], /* i : sub-band levels of the input frame */
462 Word32 pow_sum /* i : power of the input frame */
463 )
464 {
465 Word32 i;
466 Word32 L_snr_sum;
467 Word32 L_temp;
468 Word16 vad_thr, temp, noise_level;
469 Word16 low_power_flag;
470 Word16 hang_len, burst_len;
471 Word16 ilog2_speech_level, ilog2_noise_level;
472 Word16 temp2;
473
474 /* Calculate squared sum of the input levels (level) divided by the background noise components
475 * (bckr_est). */
476 L_snr_sum = 0;
477 for (i = 0; i < COMPLEN; i++)
478 {
479 Word16 exp;
480
481 exp = norm_s(st->bckr_est[i]);
482 temp = (st->bckr_est[i] << exp);
483 temp = div_s((level[i] >> 1), temp);
484 temp = shl(temp, (exp - (UNIRSHFT - 1)));
485 L_snr_sum = L_mac(L_snr_sum, temp, temp);
486 }
487
488 /* Calculate average level of estimated background noise */
489 L_temp = 0;
490 for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
491 {
492 L_temp = vo_L_add(L_temp, st->bckr_est[i]);
493 }
494
495 noise_level = extract_h((L_temp << 12));
496 /* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
497 temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;
498
499 if(st->speech_level < temp)
500 {
501 st->speech_level = temp;
502 }
503 ilog2_noise_level = ilog2(noise_level);
504
505 /* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
506 * subtracting MIN_SPEECH_SNR*noise_level from speech level */
507 ilog2_speech_level = ilog2(st->speech_level - temp);
508
509 temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);
510
511 temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
512 if (temp2 < SP_CH_MIN)
513 {
514 temp2 = SP_CH_MIN;
515 }
516 if (temp2 > SP_CH_MAX)
517 {
518 temp2 = SP_CH_MAX;
519 }
520 vad_thr = temp + temp2;
521
522 if(vad_thr < THR_MIN)
523 {
524 vad_thr = THR_MIN;
525 }
526 /* Shift VAD decision register */
527 st->vadreg = (st->vadreg >> 1);
528
529 /* Make intermediate VAD decision */
530 if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
531 {
532 st->vadreg = (Word16) (st->vadreg | 0x4000);
533 }
534 /* check if the input power (pow_sum) is lower than a threshold" */
535 if(pow_sum < VAD_POW_LOW)
536 {
537 low_power_flag = 1;
538 } else
539 {
540 low_power_flag = 0;
541 }
542 /* Update background noise estimates */
543 noise_estimate_update(st, level);
544
545 /* Calculate values for hang_len and burst_len based on vad_thr */
546 hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
547 if(hang_len < HANG_LOW)
548 {
549 hang_len = HANG_LOW;
550 }
551 burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);
552
553 return (hangover_addition(st, low_power_flag, hang_len, burst_len));
554 }
555
556 /******************************************************************************
557 *
558 * Function : Estimate_Speech()
559 * Purpose : Estimate speech level
560 *
561 * Maximum signal level is searched and stored to the variable sp_max.
562 * The speech frames must locate within SP_EST_COUNT number of frames.
563 * Thus, noisy frames having occasional VAD = "1" decisions will not
564 * affect to the estimated speech_level.
565 *
566 *******************************************************************************/
567
Estimate_Speech(VadVars * st,Word16 in_level)568 static void Estimate_Speech(
569 VadVars * st, /* i/o : State structure */
570 Word16 in_level /* level of the input frame */
571 )
572 {
573 Word16 alpha;
574
575 /* if the required activity count cannot be achieved, reset counters */
576 if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
577 {
578 st->sp_est_cnt = 0;
579 st->sp_max = 0;
580 st->sp_max_cnt = 0;
581 }
582 st->sp_est_cnt += 1;
583
584 if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
585 {
586 /* update sp_max */
587 if(in_level > st->sp_max)
588 {
589 st->sp_max = in_level;
590 }
591 st->sp_max_cnt += 1;
592
593 if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
594 {
595 Word16 tmp;
596 /* update speech estimate */
597 tmp = (st->sp_max >> 1); /* scale to get "average" speech level */
598
599 /* select update speed */
600 if(tmp > st->speech_level)
601 {
602 alpha = ALPHA_SP_UP;
603 } else
604 {
605 alpha = ALPHA_SP_DOWN;
606 }
607 if(tmp > MIN_SPEECH_LEVEL2)
608 {
609 st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level)));
610 }
611 /* clear all counters used for speech estimation */
612 st->sp_max = 0;
613 st->sp_max_cnt = 0;
614 st->sp_est_cnt = 0;
615 }
616 }
617 }
618
619 /******************************************************************************
620 *
621 * Function: wb_vad_init
622 * Purpose: Allocates state memory and initializes state memory
623 *
624 *******************************************************************************/
625
wb_vad_init(VadVars ** state,VO_MEM_OPERATOR * pMemOP)626 Word16 wb_vad_init( /* return: non-zero with error, zero for ok. */
627 VadVars ** state, /* i/o : State structure */
628 VO_MEM_OPERATOR *pMemOP
629 )
630 {
631 VadVars *s;
632
633 if (state == (VadVars **) NULL)
634 {
635 fprintf(stderr, "vad_init: invalid parameter\n");
636 return -1;
637 }
638 *state = NULL;
639
640 /* allocate memory */
641 if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
642 {
643 fprintf(stderr, "vad_init: can not malloc state structure\n");
644 return -1;
645 }
646 wb_vad_reset(s);
647
648 *state = s;
649
650 return 0;
651 }
652
653 /******************************************************************************
654 *
655 * Function: wb_vad_reset
656 * Purpose: Initializes state memory
657 *
658 *******************************************************************************/
659
wb_vad_reset(VadVars * state)660 Word16 wb_vad_reset( /* return: non-zero with error, zero for ok. */
661 VadVars * state /* i/o : State structure */
662 )
663 {
664 Word32 i, j;
665
666 if (state == (VadVars *) NULL)
667 {
668 fprintf(stderr, "vad_reset: invalid parameter\n");
669 return -1;
670 }
671 state->tone_flag = 0;
672 state->vadreg = 0;
673 state->hang_count = 0;
674 state->burst_count = 0;
675 state->hang_count = 0;
676
677 /* initialize memory used by the filter bank */
678 for (i = 0; i < F_5TH_CNT; i++)
679 {
680 for (j = 0; j < 2; j++)
681 {
682 state->a_data5[i][j] = 0;
683 }
684 }
685
686 for (i = 0; i < F_3TH_CNT; i++)
687 {
688 state->a_data3[i] = 0;
689 }
690
691 /* initialize the rest of the memory */
692 for (i = 0; i < COMPLEN; i++)
693 {
694 state->bckr_est[i] = NOISE_INIT;
695 state->old_level[i] = NOISE_INIT;
696 state->ave_level[i] = NOISE_INIT;
697 state->sub_level[i] = 0;
698 }
699
700 state->sp_est_cnt = 0;
701 state->sp_max = 0;
702 state->sp_max_cnt = 0;
703 state->speech_level = SPEECH_LEVEL_INIT;
704 state->prev_pow_sum = 0;
705 return 0;
706 }
707
708 /******************************************************************************
709 *
710 * Function: wb_vad_exit
711 * Purpose: The memory used for state memory is freed
712 *
713 *******************************************************************************/
714
wb_vad_exit(VadVars ** state,VO_MEM_OPERATOR * pMemOP)715 void wb_vad_exit(
716 VadVars ** state, /* i/o : State structure */
717 VO_MEM_OPERATOR *pMemOP
718 )
719 {
720 if (state == NULL || *state == NULL)
721 return;
722 /* deallocate memory */
723 mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
724 *state = NULL;
725 return;
726 }
727
728 /******************************************************************************
729 *
730 * Function : wb_vad_tone_detection
731 * Purpose : Search maximum pitch gain from a frame. Set tone flag if
732 * pitch gain is high. This is used to detect
733 * signaling tones and other signals with high pitch gain.
734 *
735 *******************************************************************************/
736
wb_vad_tone_detection(VadVars * st,Word16 p_gain)737 void wb_vad_tone_detection(
738 VadVars * st, /* i/o : State struct */
739 Word16 p_gain /* pitch gain */
740 )
741 {
742 /* update tone flag */
743 st->tone_flag = (st->tone_flag >> 1);
744
745 /* if (pitch_gain > TONE_THR) set tone flag */
746 if (p_gain > TONE_THR)
747 {
748 st->tone_flag = (Word16) (st->tone_flag | 0x4000);
749 }
750 }
751
752 /******************************************************************************
753 *
754 * Function : wb_vad
755 * Purpose : Main program for Voice Activity Detection (VAD) for AMR
756 *
757 *******************************************************************************/
758
wb_vad(VadVars * st,Word16 in_buf[])759 Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */
760 VadVars * st, /* i/o : State structure */
761 Word16 in_buf[] /* i : samples of the input frame */
762 )
763 {
764 Word16 level[COMPLEN];
765 Word32 i;
766 Word16 VAD_flag, temp;
767 Word32 L_temp, pow_sum;
768
769 /* Calculate power of the input frame. */
770 L_temp = 0L;
771 for (i = 0; i < FRAME_LEN; i++)
772 {
773 L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
774 }
775
776 /* pow_sum = power of current frame and previous frame */
777 pow_sum = L_add(L_temp, st->prev_pow_sum);
778
779 /* save power of current frame for next call */
780 st->prev_pow_sum = L_temp;
781
782 /* If input power is very low, clear tone flag */
783 if (pow_sum < POW_TONE_THR)
784 {
785 st->tone_flag = (Word16) (st->tone_flag & 0x1fff);
786 }
787 /* Run the filter bank and calculate signal levels at each band */
788 filter_bank(st, in_buf, level);
789
790 /* compute VAD decision */
791 VAD_flag = vad_decision(st, level, pow_sum);
792
793 /* Calculate input level */
794 L_temp = 0;
795 for (i = 1; i < COMPLEN; i++) /* ignore lowest band */
796 {
797 L_temp = vo_L_add(L_temp, level[i]);
798 }
799
800 temp = extract_h(L_temp << 12);
801
802 Estimate_Speech(st, temp); /* Estimate speech level */
803 return (VAD_flag);
804 }
805
806
807
808
809