1 /*---------------------------------------------------------------------------*
2 * voicing.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20
21
22 #include <stdlib.h>
23 #include <string.h>
24 #include <math.h>
25 #include <assert.h>
26
27 #include <limits.h>
28 #ifndef _RTT
29 #include <stdio.h>
30 #endif
31
32 #include "all_defs.h"
33 #include "voicing.h"
34
35 #include "portable.h"
36
37 #include "../cfront/sh_down.h"
38
39 #define DEBUG 0
40
41
42 static const char voicing[] = "$Id: voicing.c,v 1.1.10.5 2007/10/15 18:06:24 dahan Exp $";
43
44
init_voicing_analysis(voicing_info * chan)45 void init_voicing_analysis(voicing_info *chan)
46 {
47 chan->count = -1;
48 chan->sil_count = 0;
49 chan->speech_count = 0;
50 chan->fast_count = 0;
51 #if DEBUG
52 log_report("U: 255 255 255 -1 -1 -1 -1\n");
53 #endif
54 return;
55 }
56
voicing_analysis(voicing_info * chan,voicedata enval,int * log)57 long voicing_analysis(voicing_info *chan, voicedata enval , int* log)
58 {
59 long retval;
60 int threshold;
61
62 if (chan->count < 0)
63 {
64 chan->b1 = SHIFT_UP(enval, 8);
65 chan->b0 = SHIFT_UP(enval, 8);
66 chan->count = -1;
67 }
68
69 /* background level
70 */
71 if (chan->b0 > SHIFT_UP(enval, 8))
72 {
73 chan->b0 = SHIFT_UP(enval, 8);
74 chan->count = 0;
75 }
76 if (chan->count > B0_HANG2)
77 chan->b0 += B0_RATE2;
78 else if (chan->count > B0_HANG1)
79 chan->b0 += B0_RATE1;
80
81 chan->count++;
82
83 /* the second background level
84 */
85 if ((enval - chan->quiet_margin) < (chan->b0 >> 8))
86 chan->b1 += SHIFT_DOWN(B1_RATE * (SHIFT_UP(enval, 8) - chan->b1), 8);
87
88 /* speech level
89 */
90 if (chan->s0 < SHIFT_UP(enval, 8))
91 chan->s0 = SHIFT_UP(enval, 8);
92 else
93 chan->s0 -= B0_RATE1;
94
95 /* increase the range by 25% */
96 threshold = (chan->b1 + (SHIFT_DOWN(
97 MAX(chan->s0 - chan->b0 - DYNAMIC_RANGE, 0), 2))) >> 8;
98
99 /* Is it speech?
100 */
101 if (enval > (threshold + chan->margin))
102 chan->speech_count++;
103 else
104 chan->speech_count = 0;
105
106 /* Is it Fast-match speech
107 */
108 if (enval > (threshold + chan->fast_margin))
109 chan->fast_count++;
110 else
111 chan->fast_count = 0;
112
113 if (enval <= (threshold + chan->quiet_margin))
114 chan->sil_count++;
115 else
116 chan->sil_count = 0;
117
118 /*******************
119 * Returning flags *
120 *******************/
121
122 retval = 0L;
123
124 if (chan->fast_count > chan->voice_duration)
125 retval = FAST_VOICE_BIT;
126 else if (chan->sil_count > chan->quiet_duration)
127 retval = QUIET_BIT;
128
129 if (chan->speech_count > chan->voice_duration)
130 retval |= VOICE_BIT;
131
132 if (chan->sil_count > 0)
133 retval |= BELOW_THRESHOLD_BIT;
134
135 chan->voice_status = retval;
136 #if DEBUG
137 log_report("U: %d %.1f %.1f, %d %d %d %d\n", (int) enval,
138 chan->b0 / 256.0, chan->b1 / 256.0,
139 chan->speech_count, chan->fast_count,
140 chan->sil_count, chan->count);
141 #endif
142 return (retval);
143 }
144