• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  srec_eosd.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include"portable.h"
21 #include"passert.h"
22 #include"srec.h"
23 #include"srec_eosd.h"
24 #include"srec_context.h"
25 #include"word_lattice.h"
26 
srec_eosd_allocate(srec_eos_detector_parms ** peosd,int eos_costdelta,int opt_eos_costdelta,int terminal_timeout,int optional_terminal_timeout,int non_terminal_timeout,int max_speech_duration)27 void srec_eosd_allocate(srec_eos_detector_parms** peosd,
28                         int eos_costdelta,
29                         int opt_eos_costdelta,
30                         int terminal_timeout,
31                         int optional_terminal_timeout,
32                         int non_terminal_timeout,
33                         int max_speech_duration)
34 {
35   srec_eos_detector_parms* eosd;
36   eosd = (srec_eos_detector_parms*)CALLOC(1, sizeof(srec_eos_detector_parms), "search.endpointer");
37   eosd->eos_costdelta        = (frameID)eos_costdelta;
38   eosd->opt_eos_costdelta    = (frameID)opt_eos_costdelta;
39   eosd->endnode_timeout      = (frameID)terminal_timeout;
40   eosd->optendnode_timeout   = (frameID)optional_terminal_timeout;
41   eosd->internalnode_timeout = (frameID)non_terminal_timeout;
42   eosd->inspeech_timeout     = (frameID)max_speech_duration;
43   *peosd = eosd;
44 }
45 
srec_eosd_destroy(srec_eos_detector_parms * eosd)46 void srec_eosd_destroy(srec_eos_detector_parms* eosd)
47 {
48   FREE(eosd);
49 }
50 
51 /* The current algorithm does not make use of most of the frmcnt counters,
52    rather we look at the eos frame from the final end node search state
53    and comparrer with the current frame.  The new method is less sensitive
54    to background noise.
55 
56    The 1.9 method had a blatant bug in that we were reseting the optend_frmnt
57    when there were no live alternative tokens, ie xftoken == NUL was causing
58    reset!
59 */
60 
srec_eosd_state_reset(srec_eos_detector_state * eosd_state)61 void srec_eosd_state_reset(srec_eos_detector_state* eosd_state)
62 {
63   eosd_state->endnode_frmcnt = 0;
64   eosd_state->optendnode_frmcnt = 0;
65   eosd_state->internalnode_frmcnt = 0;
66   eosd_state->inspeech_frmcnt = 0;
67   eosd_state->internalnode_node_index = MAXnodeID;
68 }
69 
srec_check_end_of_speech_end(srec * rec)70 EOSrc srec_check_end_of_speech_end(srec* rec)
71 {
72   EOSrc rc = SPEECH_MAYBE_ENDED;
73   return rc;
74 }
75 
srec_check_end_of_speech(srec_eos_detector_parms * eosd_parms,srec * rec)76 EOSrc srec_check_end_of_speech(srec_eos_detector_parms* eosd_parms, srec* rec)
77 {
78   nodeID end_node;
79   EOSrc rc = VALID_SPEECH_CONTINUING;
80   bigcostdata eos_cost_margin;
81   bigcostdata opteos_cost_margin;
82   word_token* last_wtoken;
83   int nframes_since_eos;
84 
85   fsmnode_token *ftoken, *eftoken, *oeftoken, *xftoken;
86   ftokenID ftoken_index, eftoken_index, oeftoken_index, xftoken_index;
87   costdata wrapup_cost = rec->context->wrapup_cost;
88   srec_eos_detector_state* eosd_state = &rec->eosd_state;
89 
90   if (rec->current_search_frame == 1)
91     srec_eosd_state_reset(eosd_state);
92 
93   end_node = rec->context->end_node;
94   eftoken_index = rec->best_token_for_node[ end_node];
95   if (eftoken_index != MAXftokenID)
96     eftoken = &rec->fsmnode_token_array[ eftoken_index];
97   else
98     eftoken = NULL;
99 
100   xftoken_index  = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
101   if (xftoken_index != MAXftokenID)
102     xftoken = &rec->fsmnode_token_array[ xftoken_index];
103   else
104     xftoken = NULL;
105 
106   oeftoken_index = rec->current_best_ftoken_index[NODE_INFO_OPTENDN];
107   if (oeftoken_index != MAXftokenID)
108     oeftoken = &rec->fsmnode_token_array[ oeftoken_index];
109   else
110     oeftoken = NULL;
111 
112 
113   if (rec->srec_ended)
114     rc = SPEECH_MAYBE_ENDED;
115   else if (rec->current_search_frame >= rec->word_lattice->max_frames - 1
116            || rec->current_search_frame >= eosd_parms->inspeech_timeout)
117   {
118     /* here we will need to differentiate max_frames from
119        num_frames_allocated */
120     if (eftoken_index != MAXftokenID)
121       rc = SPEECH_ENDED;
122     else
123       rc = SPEECH_TOO_LONG;
124   }
125   else
126   {
127 
128     /* reset the internal counter? */
129     ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
130     if (ftoken_index != MAXftokenID)
131     {
132       ftoken = &rec->fsmnode_token_array[ ftoken_index];
133       if (eosd_state->internalnode_node_index != ftoken->FSMnode_index)
134       {
135         eosd_state->internalnode_node_index = ftoken->FSMnode_index;
136         eosd_state->internalnode_frmcnt = 1;
137       }
138       else
139       {
140         if (ftoken->word != rec->context->beg_silence_word)
141           eosd_state->internalnode_frmcnt++;
142       }
143     }
144     else
145     {
146       eosd_state->internalnode_frmcnt = 1;
147       eosd_state->internalnode_node_index = MAXnodeID;
148     }
149 
150     /* nframes since eos */
151     if (eftoken)
152     {
153       last_wtoken = NULL;
154       if (eftoken->word_backtrace != MAXwtokenID)
155       {
156         last_wtoken = &rec->word_token_array[eftoken->word_backtrace];
157         nframes_since_eos = rec->current_search_frame - last_wtoken->end_time;
158       }
159       else
160         nframes_since_eos = 0;
161     }
162     else
163       nframes_since_eos = 0;
164 
165     /* eos cost margin */
166     if (!eftoken)
167     {
168       eos_cost_margin = 0;
169     }
170     else if (!oeftoken && !xftoken)
171     {
172       eos_cost_margin = MAXcostdata;
173     }
174     else if (!oeftoken)
175     {
176       eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
177     }
178     else if (!xftoken)
179     {
180       eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
181     }
182     else if (oeftoken->cost > eftoken->cost)
183     {
184       eos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
185     }
186     else
187     { /* if(oeftoken->cost < eftoken->cost) */
188       eos_cost_margin = oeftoken->cost + wrapup_cost - eftoken->cost;
189     }
190 
191     /* opteos cost margin */
192     if (!eftoken)
193     {
194       opteos_cost_margin = 0;
195     }
196     else if (!oeftoken)
197     {
198       opteos_cost_margin = 0;
199     }
200     else if (!xftoken)
201     {
202       opteos_cost_margin = MAXcostdata;
203     }
204     else
205     {
206       opteos_cost_margin = xftoken->cost + wrapup_cost - eftoken->cost;
207     }
208 
209     if (eftoken)
210     {
211       if (oeftoken && nframes_since_eos > eosd_parms->optendnode_timeout
212           && opteos_cost_margin > eosd_parms->eos_costdelta)
213       {
214         rc = SPEECH_ENDED;
215 
216       }
217       else if (!oeftoken && nframes_since_eos > eosd_parms->endnode_timeout
218                && eos_cost_margin > eosd_parms->eos_costdelta)
219       {
220         rc = SPEECH_ENDED;
221 
222       }
223       else if (nframes_since_eos > eosd_parms->optendnode_timeout
224                && eos_cost_margin > eosd_parms->eos_costdelta)
225       {
226         rc = SPEECH_ENDED;
227 
228       }
229       else
230       {
231         rc = VALID_SPEECH_CONTINUING;
232       }
233     }
234 
235     /* reached internal timeout, ie at same node for so long? */
236     if (eosd_state->internalnode_frmcnt >= eosd_parms->internalnode_timeout)
237     {
238       /* PLogMessage("eosd_state->internalnode_frmcnt %d eosd_parms->internalnode_timeout %d\n", eosd_state->internalnode_frmcnt, eosd_parms->internalnode_timeout); */
239       ftoken_index = rec->current_best_ftoken_index[NODE_INFO_REGULAR];
240       ftoken = &rec->fsmnode_token_array [ ftoken_index];
241       /* sprintf(buf, "eos rec%d@%d,%d i%d> ", rec->id,
242       rec->current_search_frame, ftoken->FSMnode_index,
243       eosd_state->internalnode_frmcnt);
244       PLogMessage(buf);
245       sprint_word_token_backtrace(buf,sizeof(buf),rec,ftoken->word_backtrace);
246       PLogMessage(" %s\n", buf); */
247       rc = SPEECH_ENDED;
248     }
249   }
250 
251   /* the endnode will never win against an optend node because
252      the cost at endnode is the same or worse (even wrapup_cost adjustment) */
253 
254 
255 
256   /* so we need to check for optend nodes separately here
257      but we really need to remember best_optendnode_index, best_endnode_index
258      best_nonendnode_index */
259   return rc;
260 }
261