1 /* ------------------------------------------------------------------
2 * Copyright (C) 1998-2009 PacketVideo
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13 * express or implied.
14 * See the License for the specific language governing permissions
15 * and limitations under the License.
16 * -------------------------------------------------------------------
17 */
18 /****************************************************************************************
19 Portions of this file are derived from the following 3GPP standard:
20
21 3GPP TS 26.073
22 ANSI-C code for the Adaptive Multi-Rate (AMR) speech codec
23 Available from http://www.3gpp.org
24
25 (C) 2004, 3GPP Organizational Partners (ARIB, ATIS, CCSA, ETSI, TTA, TTC)
26 Permission to distribute, modify and use this file under the standard license
27 terms listed above has been obtained from the copyright holder.
28 ****************************************************************************************/
29 /*
30 ------------------------------------------------------------------------------
31
32 Pathname: ./audio/gsm-amr/c/src/bgnscd.c
33 Functions:
34 Bgn_scd_reset
35 Bgn_scd
36
37 ------------------------------------------------------------------------------
38 MODULE DESCRIPTION
39
40 Background noise source characteristic detector (SCD)
41
42 ------------------------------------------------------------------------------
43 */
44
45
46 /*----------------------------------------------------------------------------
47 ; INCLUDES
48 ----------------------------------------------------------------------------*/
49 #include <string.h>
50
51 #include "bgnscd.h"
52 #include "typedef.h"
53 #include "basic_op.h"
54 #include "cnst.h"
55 #include "copy.h"
56 #include "gmed_n.h"
57 #include "sqrt_l.h"
58
59 /*----------------------------------------------------------------------------
60 ; MACROS
61 ; Define module specific macros here
62 ----------------------------------------------------------------------------*/
63
64
65 /*----------------------------------------------------------------------------
66 ; DEFINES
67 ; Include all pre-processor statements here. Include conditional
68 ; compile variables also.
69 ----------------------------------------------------------------------------*/
70 #define TRUE 1
71 #define FALSE 0
72
73 /*----------------------------------------------------------------------------
74 ; LOCAL FUNCTION DEFINITIONS
75 ; Function Prototype declaration
76 ----------------------------------------------------------------------------*/
77
78 /*----------------------------------------------------------------------------
79 ; LOCAL VARIABLE DEFINITIONS
80 ; Variable declaration - defined here and used outside this module
81 ----------------------------------------------------------------------------*/
82
83
84 /*
85 ------------------------------------------------------------------------------
86 FUNCTION NAME: Bgn_scd_reset
87 ------------------------------------------------------------------------------
88 INPUT AND OUTPUT DEFINITIONS
89
90 Inputs:
91 state = points to memory of type Bgn_scdState.
92
93 Outputs:
94 The memory of type Bgn_scdState pointed to by state is set to all
95 zeros.
96
97 Returns:
98 Returns 0 if memory was successfully initialized,
99 otherwise returns -1.
100
101 Global Variables Used:
102 None.
103
104 Local Variables Needed:
105 None.
106
107 ------------------------------------------------------------------------------
108 FUNCTION DESCRIPTION
109
110 Resets state memory.
111
112 ------------------------------------------------------------------------------
113 REQUIREMENTS
114
115 None
116
117 ------------------------------------------------------------------------------
118 REFERENCES
119
120 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
121
122 ------------------------------------------------------------------------------
123 PSEUDO-CODE
124
125 Word16 Bgn_scd_reset (Bgn_scdState *state)
126 {
127 if (state == (Bgn_scdState *) NULL){
128 fprintf(stderr, "Bgn_scd_reset: invalid parameter\n");
129 return -1;
130 }
131
132 // Static vectors to zero
133 Set_zero (state->frameEnergyHist, L_ENERGYHIST);
134
135 // Initialize hangover handling
136 state->bgHangover = 0;
137
138 return 0;
139 }
140
141 ------------------------------------------------------------------------------
142 RESOURCES USED [optional]
143
144 When the code is written for a specific target processor the
145 the resources used should be documented below.
146
147 HEAP MEMORY USED: x bytes
148
149 STACK MEMORY USED: x bytes
150
151 CLOCK CYCLES: (cycle count equation for this function) + (variable
152 used to represent cycle count for each subroutine
153 called)
154 where: (cycle count variable) = cycle count for [subroutine
155 name]
156
157 ------------------------------------------------------------------------------
158 CAUTION [optional]
159 [State any special notes, constraints or cautions for users of this function]
160
161 ------------------------------------------------------------------------------
162 */
163
Bgn_scd_reset(Bgn_scdState * state)164 Word16 Bgn_scd_reset(Bgn_scdState *state)
165 {
166 if (state == (Bgn_scdState *) NULL)
167 {
168 /* fprintf(stderr, "Bgn_scd_reset: invalid parameter\n"); */
169 return(-1);
170 }
171
172 /* Static vectors to zero */
173 memset(state->frameEnergyHist, 0, L_ENERGYHIST*sizeof(Word16));
174
175 /* Initialize hangover handling */
176 state->bgHangover = 0;
177
178 return(0);
179 }
180
181 /****************************************************************************/
182
183 /*
184 ------------------------------------------------------------------------------
185 FUNCTION NAME: Bgn_scd
186 ------------------------------------------------------------------------------
187 INPUT AND OUTPUT DEFINITIONS
188
189 Inputs:
190 st = pointer to state variables of type Bgn_scdState
191 ltpGainHist[] = LTP gain history (Word16)
192 speech[] = synthesis speech frame (Word16)
193 voicedHangover = pointer to # of frames after last voiced frame (Word16)
194 pOverflow = pointer to overflow indicator (Flag)
195
196 Outputs:
197 st = function updates the state variables of type Bgn_scdState
198 pointed to by st.
199 voicedHangover = function updates the # of frames after last voiced
200 frame pointed to by voicedHangover.
201 pOverflow = 1 if the basic math function L_add() results in saturation.
202 else pOverflow is zero.
203
204 Returns:
205 inbgNoise = flag if background noise is present (Word16)
206
207 Global Variables Used:
208 None.
209
210 Local Variables Needed:
211 None.
212
213 ------------------------------------------------------------------------------
214 FUNCTION DESCRIPTION
215
216 Characterize synthesis speech and detect background noise.
217
218 ------------------------------------------------------------------------------
219 REQUIREMENTS
220
221 None
222
223 ------------------------------------------------------------------------------
224 REFERENCES
225
226 bgnscd.c, UMTS GSM AMR speech codec, R99 - Version 3.2.0, March 2, 2001
227
228 ------------------------------------------------------------------------------
229 PSEUDO-CODE
230
231 Word16 Bgn_scd (Bgn_scdState *st, // i : State variables for bgn SCD
232 Word16 ltpGainHist[], // i : LTP gain history
233 Word16 speech[], // o : synthesis speech frame
234 Word16 *voicedHangover // o : # of frames after last
235 voiced frame
236 )
237 {
238 Word16 i;
239 Word16 prevVoiced, inbgNoise;
240 Word16 temp;
241 Word16 ltpLimit, frameEnergyMin;
242 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
243 Word32 s;
244
245 // Update the inBackgroundNoise flag (valid for use in next frame if BFI)
246 // it now works as a energy detector floating on top
247 // not as good as a VAD.
248
249 currEnergy = 0;
250 s = (Word32) 0;
251
252 for (i = 0; i < L_FRAME; i++)
253 {
254 s = L_mac (s, speech[i], speech[i]);
255 }
256
257 s = L_shl(s, 2);
258
259 currEnergy = extract_h (s);
260
261 frameEnergyMin = 32767;
262
263 for (i = 0; i < L_ENERGYHIST; i++)
264 {
265 if (sub(st->frameEnergyHist[i], frameEnergyMin) < 0)
266 frameEnergyMin = st->frameEnergyHist[i];
267 }
268
269 noiseFloor = shl (frameEnergyMin, 4); // Frame Energy Margin of 16
270
271 maxEnergy = st->frameEnergyHist[0];
272 for (i = 1; i < L_ENERGYHIST-4; i++)
273 {
274 if ( sub (maxEnergy, st->frameEnergyHist[i]) < 0)
275 {
276 maxEnergy = st->frameEnergyHist[i];
277 }
278 }
279
280 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
281 for (i = 2*L_ENERGYHIST/3+1; i < L_ENERGYHIST; i++)
282 {
283 if ( sub (maxEnergyLastPart, st->frameEnergyHist[i] ) < 0)
284 {
285 maxEnergyLastPart = st->frameEnergyHist[i];
286 }
287 }
288
289 inbgNoise = 0; // false
290
291 // Do not consider silence as noise
292 // Do not consider continuous high volume as noise
293 // Or if the current noise level is very low
294 // Mark as noise if under current noise limit
295 // OR if the maximum energy is below the upper limit
296
297 if ( (sub(maxEnergy, LOWERNOISELIMIT) > 0) &&
298 (sub(currEnergy, FRAMEENERGYLIMIT) < 0) &&
299 (sub(currEnergy, LOWERNOISELIMIT) > 0) &&
300 ( (sub(currEnergy, noiseFloor) < 0) ||
301 (sub(maxEnergyLastPart, UPPERNOISELIMIT) < 0)))
302 {
303 if (sub(add(st->bgHangover, 1), 30) > 0)
304 {
305 st->bgHangover = 30;
306 } else
307 {
308 st->bgHangover = add(st->bgHangover, 1);
309 }
310 }
311 else
312 {
313 st->bgHangover = 0;
314 }
315
316 // make final decision about frame state , act somewhat cautiosly
317 if (sub(st->bgHangover,1) > 0)
318 inbgNoise = 1; // true
319
320 for (i = 0; i < L_ENERGYHIST-1; i++)
321 {
322 st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
323 }
324 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
325
326 // prepare for voicing decision; tighten the threshold after some
327 time in noise
328 ltpLimit = 13926; // 0.85 Q14
329 if (sub(st->bgHangover, 8) > 0)
330 {
331 ltpLimit = 15565; // 0.95 Q14
332 }
333 if (sub(st->bgHangover, 15) > 0)
334 {
335 ltpLimit = 16383; // 1.00 Q14
336 }
337
338 // weak sort of voicing indication.
339 prevVoiced = 0; // false
340
341 if (sub(gmed_n(<pGainHist[4], 5), ltpLimit) > 0)
342 {
343 prevVoiced = 1; // true
344 }
345 if (sub(st->bgHangover, 20) > 0) {
346 if (sub(gmed_n(ltpGainHist, 9), ltpLimit) > 0)
347 {
348 prevVoiced = 1; // true
349 }
350 else
351 {
352 prevVoiced = 0; // false
353 }
354 }
355
356 if (prevVoiced)
357 {
358 *voicedHangover = 0;
359 }
360 else
361 {
362 temp = add(*voicedHangover, 1);
363 if (sub(temp, 10) > 0)
364 {
365 *voicedHangover = 10;
366 }
367 else
368 {
369 *voicedHangover = temp;
370 }
371 }
372
373 return inbgNoise;
374 }
375
376 ------------------------------------------------------------------------------
377 RESOURCES USED [optional]
378
379 When the code is written for a specific target processor the
380 the resources used should be documented below.
381
382 HEAP MEMORY USED: x bytes
383
384 STACK MEMORY USED: x bytes
385
386 CLOCK CYCLES: (cycle count equation for this function) + (variable
387 used to represent cycle count for each subroutine
388 called)
389 where: (cycle count variable) = cycle count for [subroutine
390 name]
391
392 ------------------------------------------------------------------------------
393 CAUTION [optional]
394 [State any special notes, constraints or cautions for users of this function]
395
396 ------------------------------------------------------------------------------
397 */
398
Bgn_scd(Bgn_scdState * st,Word16 ltpGainHist[],Word16 speech[],Word16 * voicedHangover,Flag * pOverflow)399 Word16 Bgn_scd(Bgn_scdState *st, /* i : State variables for bgn SCD */
400 Word16 ltpGainHist[], /* i : LTP gain history */
401 Word16 speech[], /* o : synthesis speech frame */
402 Word16 *voicedHangover,/* o : # of frames after last
403 voiced frame */
404 Flag *pOverflow
405 )
406 {
407 Word16 i;
408 Word16 prevVoiced, inbgNoise;
409 Word16 temp;
410 Word16 ltpLimit, frameEnergyMin;
411 Word16 currEnergy, noiseFloor, maxEnergy, maxEnergyLastPart;
412 Word32 s, L_temp;
413
414
415 /* Update the inBackgroundNoise flag (valid for use in next frame if BFI) */
416 /* it now works as a energy detector floating on top */
417 /* not as good as a VAD. */
418
419 s = (Word32) 0;
420
421 for (i = L_FRAME - 1; i >= 0; i--)
422 {
423 L_temp = ((Word32) speech[i]) * speech[i];
424 if (L_temp != (Word32) 0x40000000L)
425 {
426 L_temp = L_temp << 1;
427 }
428 else
429 {
430 L_temp = MAX_32;
431 }
432 s = L_add(s, L_temp, pOverflow);
433 }
434
435 /* s is a sum of squares, so don't need to check for neg overflow */
436 if (s > (Word32)0x1fffffffL)
437 {
438 currEnergy = MAX_16;
439 }
440 else
441 {
442 currEnergy = (Word16)(s >> 14);
443 }
444
445 frameEnergyMin = 32767;
446 for (i = L_ENERGYHIST - 1; i >= 0; i--)
447 {
448 if (st->frameEnergyHist[i] < frameEnergyMin)
449 {
450 frameEnergyMin = st->frameEnergyHist[i];
451 }
452 }
453
454 /* Frame Energy Margin of 16 */
455 L_temp = (Word32)frameEnergyMin << 4;
456 if (L_temp != (Word32)((Word16) L_temp))
457 {
458 if (L_temp > 0)
459 {
460 noiseFloor = MAX_16;
461 }
462 else
463 {
464 noiseFloor = MIN_16;
465 }
466 }
467 else
468 {
469 noiseFloor = (Word16)(L_temp);
470 }
471
472 maxEnergy = st->frameEnergyHist[0];
473 for (i = L_ENERGYHIST - 5; i >= 1; i--)
474 {
475 if (maxEnergy < st->frameEnergyHist[i])
476 {
477 maxEnergy = st->frameEnergyHist[i];
478 }
479 }
480
481 maxEnergyLastPart = st->frameEnergyHist[2*L_ENERGYHIST/3];
482 for (i = 2 * L_ENERGYHIST / 3 + 1; i < L_ENERGYHIST; i++)
483 {
484 if (maxEnergyLastPart < st->frameEnergyHist[i])
485 {
486 maxEnergyLastPart = st->frameEnergyHist[i];
487 }
488 }
489
490 /* Do not consider silence as noise */
491 /* Do not consider continuous high volume as noise */
492 /* Or if the current noise level is very low */
493 /* Mark as noise if under current noise limit */
494 /* OR if the maximum energy is below the upper limit */
495
496 if ((maxEnergy > LOWERNOISELIMIT) &&
497 (currEnergy < FRAMEENERGYLIMIT) &&
498 (currEnergy > LOWERNOISELIMIT) &&
499 ((currEnergy < noiseFloor) ||
500 (maxEnergyLastPart < UPPERNOISELIMIT)))
501 {
502 if ((st->bgHangover + 1) > 30)
503 {
504 st->bgHangover = 30;
505 }
506 else
507 {
508 st->bgHangover += 1;
509 }
510 }
511 else
512 {
513 st->bgHangover = 0;
514 }
515
516 /* make final decision about frame state , act somewhat cautiosly */
517
518 if (st->bgHangover > 1)
519 {
520 inbgNoise = TRUE;
521 }
522 else
523 {
524 inbgNoise = FALSE;
525 }
526
527 for (i = 0; i < L_ENERGYHIST - 1; i++)
528 {
529 st->frameEnergyHist[i] = st->frameEnergyHist[i+1];
530 }
531 st->frameEnergyHist[L_ENERGYHIST-1] = currEnergy;
532
533 /* prepare for voicing decision; tighten the threshold after some
534 time in noise */
535
536 if (st->bgHangover > 15)
537 {
538 ltpLimit = 16383; /* 1.00 Q14 */
539 }
540 else if (st->bgHangover > 8)
541 {
542 ltpLimit = 15565; /* 0.95 Q14 */
543 }
544 else
545 {
546 ltpLimit = 13926; /* 0.85 Q14 */
547 }
548
549 /* weak sort of voicing indication. */
550 prevVoiced = FALSE;
551
552 if (gmed_n(<pGainHist[4], 5) > ltpLimit)
553 {
554 prevVoiced = TRUE;
555 }
556
557 if (st->bgHangover > 20)
558 {
559 if (gmed_n(ltpGainHist, 9) > ltpLimit)
560 {
561 prevVoiced = TRUE;
562 }
563 else
564 {
565 prevVoiced = FALSE;
566 }
567 }
568
569
570 if (prevVoiced)
571 {
572 *voicedHangover = 0;
573 }
574 else
575 {
576 temp = *voicedHangover + 1;
577
578 if (temp > 10)
579 {
580 *voicedHangover = 10;
581 }
582 else
583 {
584 *voicedHangover = temp;
585 }
586 }
587
588 return(inbgNoise);
589 }
590