1 /*---------------------------------------------------------------------------*
2 * srec_eosd.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include"portable.h"
21 #include"passert.h"
22 #include"srec.h"
23 #include"srec_eosd.h"
24 #include"srec_context.h"
25 #include"word_lattice.h"
26
srec_eosd_allocate(srec_eos_detector_parms ** peosd,int eos_costdelta,int opt_eos_costdelta,int terminal_timeout,int optional_terminal_timeout,int non_terminal_timeout,int max_speech_duration)27 void srec_eosd_allocate(srec_eos_detector_parms** peosd,
28 int eos_costdelta,
29 int opt_eos_costdelta,
30 int terminal_timeout,
31 int optional_terminal_timeout,
32 int non_terminal_timeout,
33 int max_speech_duration)
34 {
35 srec_eos_detector_parms* eosd;
36 eosd = (srec_eos_detector_parms*)CALLOC(1, sizeof(srec_eos_detector_parms), "search.endpointer");
37 eosd->eos_costdelta = (frameID)eos_costdelta;
38 eosd->opt_eos_costdelta = (frameID)opt_eos_costdelta;
39 eosd->endnode_timeout = (frameID)terminal_timeout;
40 eosd->optendnode_timeout = (frameID)optional_terminal_timeout;
41 eosd->internalnode_timeout = (frameID)non_terminal_timeout;
42 eosd->inspeech_timeout = (frameID)max_speech_duration;
43 *peosd = eosd;
44 }
45
srec_eosd_destroy(srec_eos_detector_parms * eosd)46 void srec_eosd_destroy(srec_eos_detector_parms* eosd)
47 {
48 FREE(eosd);
49 }
50
51 /* The current algorithm does not make use of most of the frmcnt counters,
52 rather we look at the eos frame from the final end node search state
53 and comparrer with the current frame. The new method is less sensitive
54 to background noise.
55
56 The 1.9 method had a blatant bug in that we were reseting the optend_frmnt
57 when there were no live alternative tokens, ie xftoken == NUL was causing
58 reset!
59 */
60
srec_eosd_state_reset(srec_eos_detector_state * eosd_state)61 void srec_eosd_state_reset(srec_eos_detector_state* eosd_state)
62 {
63 eosd_state->endnode_frmcnt = 0;
64 eosd_state->optendnode_frmcnt = 0;
65 eosd_state->internalnode_frmcnt = 0;
66 eosd_state->inspeech_frmcnt = 0;
67 eosd_state->internalnode_node_index = MAXnodeID;
68 }
69
srec_check_end_of_speech_end(srec * rec)70 EOSrc srec_check_end_of_speech_end(srec* rec)
71 {
72 EOSrc rc = SPEECH_MAYBE_ENDED;
73 return rc;
74 }
75
srec_check_end_of_speech(srec_eos_detector_parms * eosd_parms,srec * rec)76 EOSrc srec_check_end_of_speech(srec_eos_detector_parms* eosd_parms, srec* rec)
77 {
78 nodeID end_node;
79 EOSrc rc = VALID_SPEECH_CONTINUING;
80 bigcostdata eos_cost_margin;
81 bigcostdata opteos_cost_margin;
82 word_token* last_wtoken;
83 int nframes_since_eos;
84
85 fsmnode_token *ftoken, *eftoken, *oeftoken, *xftoken;
86 ftokenID ftoken_index, eftoken_index, oeftoken_index, xftoken_index;
87 costdata wrapup_cost = rec->context->wrapup_cost;
88 srec_eos_detector_state* eosd_state = &rec->eosd_state;
89
90 if (rec->current_search_frame == 1)
91 srec_eosd_state_reset(eosd_state);
92
93 end_node = rec->context->end_node;
94 eftoken_index = rec->best_token_for_node[ end_node];
95 if (eftoken_index != MAXftokenID)
96 eftoken = &rec->fsmnode_token_array[ eftoken_index];
97 else
98 eftoken = NULL;
99
100 xftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
101 if (xftoken_index != MAXftokenID)
102 xftoken = &rec->fsmnode_token_array[ xftoken_index];
103 else
104 xftoken = NULL;
105
106 oeftoken_index = rec->current_best_ftoken_index[NODE_INFO_OPTENDN];
107 if (oeftoken_index != MAXftokenID)
108 oeftoken = &rec->fsmnode_token_array[ oeftoken_index];
109 else
110 oeftoken = NULL;
111
112
113 if (rec->srec_ended)
114 rc = SPEECH_MAYBE_ENDED;
115 else if (rec->current_search_frame >= rec->word_lattice->max_frames - 1
116 || rec->current_search_frame >= eosd_parms->inspeech_timeout)
117 {
118 /* here we will need to differentiate max_frames from
119 num_frames_allocated */
120 if (eftoken_index != MAXftokenID)
121 rc = SPEECH_ENDED;
122 else
123 rc = SPEECH_TOO_LONG;
124 }
125 else
126 {
127
128 /* reset the internal counter? */
129 ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
130 if (ftoken_index != MAXftokenID)
131 {
132 ftoken = &rec->fsmnode_token_array[ ftoken_index];
133 if (eosd_state->internalnode_node_index != ftoken->FSMnode_index)
134 {
135 eosd_state->internalnode_node_index = ftoken->FSMnode_index;
136 eosd_state->internalnode_frmcnt = 1;
137 }
138 else
139 {
140 if (ftoken->word != rec->context->beg_silence_word)
141 eosd_state->internalnode_frmcnt++;
142 }
143 }
144 else
145 {
146 eosd_state->internalnode_frmcnt = 1;
147 eosd_state->internalnode_node_index = MAXnodeID;
148 }
149
150 /* nframes since eos */
151 if (eftoken)
152 {
153 last_wtoken = NULL;
154 if (eftoken->word_backtrace != MAXwtokenID)
155 {
156 last_wtoken = &rec->word_token_array[eftoken->word_backtrace];
157 nframes_since_eos = rec->current_search_frame - last_wtoken->end_time;
158 }
159 else
160 nframes_since_eos = 0;
161 }
162 else
163 nframes_since_eos = 0;
164
165 /* eos cost margin */
166 if (!eftoken)
167 {
168 eos_cost_margin = 0;
169 }
170 else if (!oeftoken && !xftoken)
171 {
172 eos_cost_margin = MAXcostdata;
173 }
174 else if (!oeftoken)
175 {
176 eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
177 }
178 else if (!xftoken)
179 {
180 eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
181 }
182 else if (oeftoken->cost > eftoken->cost)
183 {
184 eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
185 }
186 else
187 { /* if(oeftoken->cost < eftoken->cost) */
188 eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
189 }
190
191 /* opteos cost margin */
192 if (!eftoken)
193 {
194 opteos_cost_margin = 0;
195 }
196 else if (!oeftoken)
197 {
198 opteos_cost_margin = 0;
199 }
200 else if (!xftoken)
201 {
202 opteos_cost_margin = MAXcostdata;
203 }
204 else
205 {
206 opteos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
207 }
208
209 if (eftoken)
210 {
211 if (oeftoken && nframes_since_eos > eosd_parms->optendnode_timeout
212 && opteos_cost_margin > eosd_parms->eos_costdelta)
213 {
214 rc = SPEECH_ENDED;
215
216 }
217 else if (!oeftoken && nframes_since_eos > eosd_parms->endnode_timeout
218 && eos_cost_margin > eosd_parms->eos_costdelta)
219 {
220 rc = SPEECH_ENDED;
221
222 }
223 else if (nframes_since_eos > eosd_parms->optendnode_timeout
224 && eos_cost_margin > eosd_parms->eos_costdelta)
225 {
226 rc = SPEECH_ENDED;
227
228 }
229 else
230 {
231 rc = VALID_SPEECH_CONTINUING;
232 }
233 }
234
235 /* reached internal timeout, ie at same node for so long? */
236 if (eosd_state->internalnode_frmcnt >= eosd_parms->internalnode_timeout)
237 {
238 /* PLogMessage("eosd_state->internalnode_frmcnt %d eosd_parms->internalnode_timeout %d\n", eosd_state->internalnode_frmcnt, eosd_parms->internalnode_timeout); */
239 ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
240 ftoken = &rec->fsmnode_token_array [ ftoken_index];
241 /* sprintf(buf, "eos rec%d@%d,%d i%d> ", rec->id,
242 rec->current_search_frame, ftoken->FSMnode_index,
243 eosd_state->internalnode_frmcnt);
244 PLogMessage(buf);
245 sprint_word_token_backtrace(buf,sizeof(buf),rec,ftoken->word_backtrace);
246 PLogMessage(" %s\n", buf); */
247 rc = SPEECH_ENDED;
248 }
249 }
250
251 /* the endnode will never win against an optend node because
252 the cost at endnode is the same or worse (even wrapup_cost adjustment) */
253
254
255
256 /* so we need to check for optend nodes separately here
257 but we really need to remember best_optendnode_index, best_endnode_index
258 best_nonendnode_index */
259 return rc;
260 }
261