1 /*---------------------------------------------------------------------------*
2 * srec.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23
24 #include "pstdio.h"
25 #include "passert.h"
26 #include "portable.h"
27
28 #include "hmm_desc.h"
29 #include "utteranc.h"
30 #include "hmmlib.h"
31 #include "srec_sizes.h"
32 #include "search_network.h"
33 #include "srec_context.h"
34 #include "srec.h"
35 #include "srec_stats.h"
36 #include "srec_debug.h"
37 #include "srec_tokens.h"
38 #include "word_lattice.h"
39 #include "swimodel.h"
40 #if USE_COMP_STATS
41 #include "comp_stats.h"
42 #endif
43 #include "c42mul.h"
44
45 #ifdef SET_RCSID
46 static const char *rcsid = 0 ? (const char *) &rcsid :
47 "$Id: srec.c,v 1.39.4.31 2008/06/23 17:20:39 dahan Exp $";
48 #endif
49
50 #define SILENCE_MODEL_INDEX 0
51 #define PRUNE_TIGHTEN 0.9 /*if we run out of room in the state arrays,
52 keep multiplying pruning thresh by this amount
53 until there is room */
54
55 /*--------------------------------------------------------------------------*
56 * *
57 * protos *
58 * *
59 *--------------------------------------------------------------------------*/
60 static void reset_cost_offsets(multi_srec* rec, frameID current_search_frame,
61 costdata current_best_cost);
62 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
63 costdata *pcurrent_best_cost,
64 costdata *precomputed_model_scores);
65
66 /*--------------------------------------------------------------------------*
67 * *
68 * utils *
69 * *
70 *--------------------------------------------------------------------------*/
71
dump_core(char * msg)72 static void dump_core(char *msg)
73 {
74 PLogError ( msg );
75 ASSERT(0);
76 }
77
reprune_altword_token_batch(srec * rec,altword_token * batch,bigcostdata costlimit)78 static altword_token* reprune_altword_token_batch(srec* rec,
79 altword_token* batch,
80 bigcostdata costlimit)
81 {
82 altword_token *awtoken, *next_awtoken;
83 altword_token **awtokenp;
84
85 /* look for previous invalidate, see below */
86 if (batch->costbasis == MAXcostdata / 2)
87 { /* was costdelta // costlimit */
88 free_altword_token(rec, batch);
89 return AWTNULL;
90 }
91 /* a flag to check whether we already pruned this batch would be nice */
92
93 /* first prune the CDR of the list, ie everything but the head */
94 awtokenp = &batch->next_token;
95 for (awtoken = batch->next_token; awtoken != AWTNULL; awtoken = next_awtoken)
96 {
97 next_awtoken = awtoken->next_token;
98 if ((bigcostdata)batch->costbasis + awtoken->costdelta > costlimit)
99 {
100 (*awtokenp) = awtoken->next_token;
101 awtoken->refcount = 1; /* to make sure it frees */
102 free_altword_token(rec, awtoken);
103 }
104 else
105 awtokenp = &awtoken->next_token;
106 }
107
108 /* now see about the CAR of the list, ie the head */
109 if ((bigcostdata)(batch->costbasis) + batch->costdelta < costlimit)
110 {
111 /* head of list survives pruning => no problem */
112 }
113 else if (batch->next_token != AWTNULL)
114 {
115 /* head of list does not survive => since we can't change the pointer
116 we copy a survivor into it and free that original */
117 awtoken = batch->next_token;
118 batch->costdelta = awtoken->costdelta;
119 batch->word = awtoken->word;
120 batch->word_backtrace = awtoken->word_backtrace;
121 /*ASSERT( batch->refcount == awtoken->refcount); */
122 /* batch->refcount = awtoken->refcount; */
123 batch->next_token = awtoken->next_token;
124 awtoken->refcount = 1; /* to make sure it frees */
125 free_altword_token(rec, awtoken);
126 }
127 else
128 {
129 /* head of list does not survive, nothing survives, so invalidate it */
130 batch->costbasis = MAXcostdata / 2; /* was costdelta */
131 free_altword_token(rec, batch);
132 batch = AWTNULL;
133 }
134 return batch;
135 }
136
reprune_altword_tokens(srec * rec)137 static void reprune_altword_tokens(srec* rec)
138 {
139 stokenID i, j;
140 fsmarc_token* stoken;
141 fsmnode_token* ftoken;
142 bigcostdata current_prune_delta = rec->prune_delta;
143 altword_token* awtoken;
144
145 /* first clear the costbases */
146 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
147 {
148 stoken = &rec->fsmarc_token_array[i];
149 for (j = 0; j < stoken->num_hmm_states; j++)
150 if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
151 awtoken->costbasis = MAXcostdata;
152 }
153 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
154 {
155 ftoken = &rec->fsmnode_token_array[i];
156 if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
157 awtoken->costbasis = MAXcostdata;
158 }
159 /* costbases for altword attached to stoken */
160 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
161 {
162 stoken = &rec->fsmarc_token_array[i];
163 for (j = 0; j < stoken->num_hmm_states; j++)
164 if ((awtoken = stoken->aword_backtrace[j]) != AWTNULL)
165 if (awtoken->costbasis > stoken->cost[j])
166 awtoken->costbasis = stoken->cost[j];
167 }
168 /* costbases for altword attached to ftokens */
169 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
170 {
171 ftoken = &rec->fsmnode_token_array[i];
172 if ((awtoken = ftoken->aword_backtrace) != AWTNULL)
173 if (awtoken->costbasis > ftoken->cost)
174 awtoken->costbasis = ftoken->cost;
175 }
176
177 /* now reprune */
178 while (rec->altword_token_freelist_len < rec->altword_token_array_size / 4
179 || rec->altword_token_freelist_len < 2*rec->word_priority_q->max_in_q)
180 {
181 SREC_STATS_INC_AWTOKEN_REPRUNES(1);
182 current_prune_delta = (costdata)(PRUNE_TIGHTEN * PRUNE_TIGHTEN * current_prune_delta);
183 for (i = rec->active_fsmarc_tokens; i != MAXstokenID; i = stoken->next_token_index)
184 {
185 stoken = &rec->fsmarc_token_array[i];
186 for (j = 0; j < stoken->num_hmm_states; j++)
187 {
188 if (stoken->aword_backtrace[j] != AWTNULL)
189 {
190 stoken->aword_backtrace[j] =
191 reprune_altword_token_batch(rec, stoken->aword_backtrace[j],
192 current_prune_delta);
193 }
194 }
195 }
196 for (i = rec->active_fsmnode_tokens;i != MAXftokenID;i = ftoken->next_token_index)
197 {
198 ftoken = &rec->fsmnode_token_array[i];
199 if (ftoken->aword_backtrace != AWTNULL)
200 {
201 ftoken->aword_backtrace =
202 reprune_altword_token_batch(rec, ftoken->aword_backtrace,
203 current_prune_delta);
204 }
205 }
206 ASSERT(current_prune_delta > 0);
207 }
208 }
209
210 #define refcopy_altwords(rEc, aWtOkEn) (aWtOkEn?(aWtOkEn->refcount++,aWtOkEn):aWtOkEn)
211
copy_altwords(srec * rec,altword_token * list1,costdata delta)212 static altword_token* copy_altwords(srec* rec, altword_token* list1, costdata delta)
213 {
214 altword_token *q2, dummy, *last_q2 = &dummy;
215 costdata q2_costdelta;
216
217 /* first check for space */
218 #if BUILD & BUILD_DEBUG
219 int num = 0;
220
221 for (num = 0, q2 = list1; q2 != AWTNULL; q2 = q2->next_token)
222 num++;
223 if (num > rec->altword_token_freelist_len)
224 {
225 printf("warning: mid-copy reprune_altword_tokens()\n");
226 ASSERT(0);
227 reprune_altword_tokens(rec);
228 }
229 #endif
230
231 /* now do the copy */
232 for (; list1 != AWTNULL; list1 = list1->next_token)
233 {
234 ASSERT(list1->refcount >= 1);
235
236 q2_costdelta = list1->costdelta + delta;
237 ASSERT(list1->costdelta != MAXcostdata);
238 if (q2_costdelta > rec->prune_delta)
239 continue;
240 q2 = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
241 if (!q2) /* this should never happen */
242 break;
243 last_q2->next_token = q2;
244 q2->costdelta = q2_costdelta;
245 q2->word = list1->word;
246 q2->word_backtrace = list1->word_backtrace;
247 last_q2 = q2;
248 }
249 last_q2->next_token = AWTNULL;
250 return dummy.next_token;
251 }
252
253 #if 1
254 /* sizewise_altwords just makes sure the list of altwords is no longer than
255 the number of possible word ends. Any tokens beyond that length will get
256 ignored later anyways, so we may as well kill them here.
257 This also is related to the anticipated repruning. This code currently
258 makes use of calloc/free and qsort, but can easily be rewritten to just
259 to a linear in-place sort, linear looking for the 10 best score should not
260 take too long. This is on the todo list!
261 */
altword_token_ptr_cmp(const void * a,const void * b)262 int altword_token_ptr_cmp(const void* a, const void* b)
263 {
264 const altword_token** A = (const altword_token**)a, **B = (const altword_token**)b;
265 if ((*A)->costdelta > (*B)->costdelta) return 1;
266 else if ((*A)->costdelta < (*B)->costdelta) return -1;
267 else return 0;
268 }
sizewise_altwords(srec * rec,altword_token * awtoken_head)269 static altword_token* sizewise_altwords(srec* rec, altword_token* awtoken_head)
270 {
271 #define SIZEWISE_THRESH (rec->word_priority_q->max_in_q)
272 #define SIZEWISE_TARGET (rec->word_priority_q->max_in_q*4/5)
273 int i, num;
274 altword_token *awtoken, **list;
275
276 if( SIZEWISE_TARGET == 0) {
277 free_altword_token(rec, awtoken_head);
278 return NULL;
279 }
280 num = count_altword_token(rec, awtoken_head);
281 /* if the linked list is shorter than max_in_q we're fine */
282 if (num <= SIZEWISE_THRESH)
283 return awtoken_head;
284 else
285 {
286 list = (altword_token**)CALLOC(num, sizeof(altword_token*), L("search.srec.altword_tokens"));
287 ASSERT(awtoken_head->refcount == 1);
288 for (i = 0, awtoken = awtoken_head; i < num; i++, awtoken = awtoken->next_token)
289 list[i] = awtoken;
290 qsort(list, num, sizeof(altword_token*), altword_token_ptr_cmp);
291 for (i = 0; i < SIZEWISE_TARGET; i++)
292 list[i]->next_token = list[i+1];
293 if(i>0) list[i-1]->next_token = AWTNULL;
294 for (; i < num; i++)
295 {
296 list[i]->refcount = 1; /* make sure it frees */
297 free_altword_token(rec, list[i]);
298 }
299 awtoken_head = list[0];
300 awtoken_head->refcount = 1;
301 FREE(list);
302 return awtoken_head;
303 }
304 }
305 #else
306 #define sizewise_altwords(ReC,hEad) hEad
307 #endif
308
309 /*--------------------------------------------------------------------------*
310 * *
311 * acoustic scoring utils *
312 * *
313 *--------------------------------------------------------------------------*/
314
315 #define DO_COMPUTE_MODEL 0
316 #define DO_NOT_COMPUTE_MODEL MAXcostdata
317
best_uint16(asr_uint16_t * p,int n)318 static asr_uint16_t best_uint16(asr_uint16_t* p, int n)
319 {
320 asr_uint16_t rv = p[0];
321 for (;--n > 0;p++) if (rv > *p) rv = *p;
322 return rv;
323 }
compute_model_scores(costdata * current_model_scores,const SWIModel * acoustic_models,pattern_info * pattern,frameID current_search_frame)324 static int compute_model_scores(costdata *current_model_scores, const SWIModel *acoustic_models,
325 pattern_info *pattern, frameID current_search_frame)
326 {
327 int i;
328 int num_models_computed = 0;
329
330 for (i = 0; i < acoustic_models->num_hmmstates; i++)
331 {
332 if (current_model_scores[i] == DO_COMPUTE_MODEL)
333 {
334 scodata score = mixture_diagonal_gaussian_swimodel(pattern->prep,
335 &acoustic_models->hmmstates[i], acoustic_models->num_dims);
336 ASSERT(score <= 0 && "model score out of range");
337
338 current_model_scores[i] = (costdata) - score;
339 num_models_computed++;
340 }
341 }
342 return num_models_computed;
343 }
344
345
346
347 /*precompute all needed models to be used by next frame of search*/
348
find_which_models_to_compute(srec * rec,const SWIModel * acoustic_models)349 static int find_which_models_to_compute(srec *rec, const SWIModel *acoustic_models)
350 {
351 int i;
352 modelID model_index;
353 stokenID current_token_index;
354 ftokenID current_ftoken_index;
355 fsmarc_token *current_token;
356 fsmnode_token *current_ftoken;
357 costdata *current_model_scores;
358 /* arcID arc_index; */
359 arcID fsm_arc_index;
360 HMMInfo* hmm_info;
361 FSMnode* fsm_node;
362 FSMarc* fsm_arc;
363 /*use the current_model_scores array both to tell the model computing stuff
364 what models to compute and to get the scores back. This is a bit ugly, but
365 saves having another array to allocate*/
366
367 /* this belongs elsewhere at initialization,
368 eg. where we'll associate search to acoustic models
369 */
370 rec->avg_state_durations = acoustic_models->avg_state_durations;
371
372 current_model_scores = rec->current_model_scores;
373
374 for (model_index = 0; model_index < acoustic_models->num_hmmstates; model_index++)
375 {
376 current_model_scores[model_index] = DO_NOT_COMPUTE_MODEL;
377 }
378
379 current_token_index = rec->active_fsmarc_tokens;
380
381 while (current_token_index != MAXstokenID)
382 {
383 current_token = &(rec->fsmarc_token_array[current_token_index]);
384 /*need to compute all models needed within this HMM*/
385 fsm_arc = &rec->context->FSMarc_list[ current_token->FSMarc_index];
386 hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
387
388 /*handle all states that are alive in this hmm*/
389 for (i = 0;i < hmm_info->num_states;i++)
390 {
391 if ((current_token->cost[i] != MAXcostdata) ||
392 ((i > 0) && current_token->cost[i-1] != MAXcostdata))
393 {
394 model_index = hmm_info->state_indices[i];
395 current_model_scores[model_index] = DO_COMPUTE_MODEL;
396 }
397 }
398 current_token_index = current_token->next_token_index;
399 }
400
401 /*for each active FSM node, find models which can come from node*/
402
403 current_ftoken_index = rec->active_fsmnode_tokens;
404
405 while (current_ftoken_index != MAXftokenID)
406 {
407 current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
408 fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
409 fsm_arc = NULL;
410 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc; fsm_arc_index != MAXarcID;
411 fsm_arc_index = fsm_arc->linkl_next_arc) {
412 fsm_arc = rec->context->FSMarc_list+fsm_arc_index;
413
414 if (fsm_arc->ilabel != MAXlabelID)
415 {
416 hmm_info = &rec->context->hmm_info_for_ilabel[fsm_arc->ilabel];
417 if (hmm_info->num_states > 0)
418 {
419
420 /* we should build in here a check that this arc has reasonable weight */
421 /* if(fsm_arc->cost < rec->prune_delta) */
422 current_model_scores[hmm_info->state_indices[0]] = DO_COMPUTE_MODEL;
423 }
424 }
425 }
426 current_ftoken_index = current_ftoken->next_token_index;
427 }
428
429 /*compute the scores in a batch - this allows the model computing code to
430 chunk it up however it wants*/
431 return 0;
432 }
433
434 /*--------------------------------------------------------------------------*
435 * *
436 * pruning utils *
437 * *
438 *--------------------------------------------------------------------------*/
439
440 /*this is called at the end of the search*/
prune_new_tokens(srec * rec,costdata current_prune_thresh)441 static int prune_new_tokens(srec *rec, costdata current_prune_thresh)
442 {
443 int i;
444 nodeID num_deleted;
445 stokenID token_index;
446 fsmarc_token *token;
447 stokenID next_token_index;
448 stokenID *list_head_pointer;
449 char any_alive;
450
451 num_deleted = 0;
452 list_head_pointer = &(rec->active_fsmarc_tokens);
453
454 for (token_index = rec->active_fsmarc_tokens; token_index != MAXstokenID;
455 token_index = next_token_index)
456 {
457
458 token = &(rec->fsmarc_token_array[token_index]);
459 next_token_index = token->next_token_index;
460
461 any_alive = 0;
462
463 for (i = 0;i < token->num_hmm_states;i++)
464 {
465 if (token->cost[i] < current_prune_thresh)
466 {
467 any_alive = 1;
468 }
469 }
470
471 if (!any_alive)
472 { /*everything pruned so recylce the token*/
473 *list_head_pointer = next_token_index;
474
475 rec->best_token_for_arc[rec->fsmarc_token_array[token_index].FSMarc_index] = MAXstokenID;
476
477 free_fsmarc_token(rec, token_index);
478
479 num_deleted++;
480 rec->num_new_states--;
481 }
482 else
483 {
484 list_head_pointer = &token->next_token_index;
485 }
486 }
487 return num_deleted;
488 }
489
reprune_word_tokens_if_necessary(srec * rec)490 static void reprune_word_tokens_if_necessary(srec *rec)
491 {
492 word_token* wtoken;
493 wtokenID wtoken_index = rec->word_token_freelist;
494 wtokenID num_free_wtokens = 0;
495
496 for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
497 {
498 wtoken = &rec->word_token_array[ wtoken_index];
499 num_free_wtokens++;
500 }
501 if (num_free_wtokens < 2*rec->word_priority_q->max_in_q)
502 reprune_word_tokens(rec, 0);
503 }
504
505 /*this is called when we run out of room in the state token arrays and need to make more room -
506 it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
507
508
reprune_new_states(srec * rec,costdata current_best_cost,costdata current_prune_delta)509 static costdata reprune_new_states(srec *rec, costdata current_best_cost, costdata current_prune_delta)
510 {
511 int num_deleted;
512 /*first check to see if current pruning thresholds make enough room
513 (because of better max)*/
514
515 prune_new_tokens(rec, current_best_cost + current_prune_delta);
516
517 ASSERT(((float)current_best_cost) + current_prune_delta < (float)SHRT_MAX);
518
519 while ((rec->num_new_states >= rec->max_new_states - 1)
520 || rec->fsmarc_token_freelist == MAXstokenID)
521 {
522
523 SREC_STATS_INC_STOKEN_REPRUNES(1);
524
525 current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
526
527 if (current_prune_delta <= 1)
528 {
529 /*this seems like an unlikely case, but if we can't prune enough to make room, give up
530 on the utterance (better than being stuck here forever!)*/
531
532 /*FIX replace with crec abort mechanism*/
533 PLogError ("reprune_new_states: can't seem to prune enough - best cost %d num_new_states %d\n",
534 current_best_cost, rec->num_new_states);
535
536 print_fsmarc_token_list(rec, rec->active_fsmarc_tokens, "CANNOT PRUNE");
537
538 dump_core("reprune died\n");
539 }
540
541 num_deleted = prune_new_tokens(rec, current_best_cost + current_prune_delta);
542
543 ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
544 }
545 return current_prune_delta;
546 }
547
548
549
prune_fsmnode_tokens(srec * rec,costdata current_prune_thresh,ftokenID not_this_one)550 static void prune_fsmnode_tokens(srec *rec, costdata current_prune_thresh, ftokenID not_this_one)
551 {
552 nodeID num_deleted;
553 ftokenID token_index;
554 fsmnode_token *token;
555 ftokenID next_token_index;
556 ftokenID *list_head_pointer;
557
558 num_deleted = 0;
559 token_index = rec->active_fsmnode_tokens;
560
561 token = &(rec->fsmnode_token_array[token_index]);
562 list_head_pointer = &(rec->active_fsmnode_tokens);
563
564 while (token_index != MAXftokenID)
565 {
566 next_token_index = token->next_token_index;
567 if( token_index!=not_this_one && token->cost >= current_prune_thresh)
568 {
569 /*pruned so recycle the token*/
570 *list_head_pointer = next_token_index;
571 rec->best_token_for_node[token->FSMnode_index] = MAXftokenID;
572 free_fsmnode_token(rec, token_index);
573 num_deleted++;
574 }
575 else
576 {
577 list_head_pointer = &token->next_token_index;
578 }
579 token_index = next_token_index;
580 token = &(rec->fsmnode_token_array[token_index]);
581 }
582 }
583
584
585 /*this is called when we run out of room in the state token arrays and need to make more room -
586 it only prunes in theh new time slice - we could also look at pruning the previous slice if needed*/
587
588
reprune_fsmnode_tokens(srec * rec,costdata current_best_cost,costdata current_prune_delta,ftokenID not_this_one)589 static costdata reprune_fsmnode_tokens(srec *rec, costdata current_best_cost, costdata current_prune_delta,
590 ftokenID not_this_one)
591 {
592
593 /*first check to see if current pruning thresholds make enough
594 room (because of better max)*/
595
596 prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
597
598 ASSERT((float)current_best_cost + (float)current_prune_delta < (float)SHRT_MAX);
599
600 while (rec->fsmnode_token_freelist == MAXftokenID)
601 {
602 SREC_STATS_INC_FTOKEN_REPRUNES(1);
603
604 current_prune_delta = (costdata)(PRUNE_TIGHTEN * current_prune_delta);
605
606 if (current_prune_delta <= 1)
607 {
608 /*this seems like an unlikely case, but if we can't prune enough to make room, give up
609 on the utterance (better than being stuck here forever!)*/
610
611 /*FIX replace with crec abort mechanism*/
612 PLogError ("reprune_fsmnode_tokens: can't seem to prune enough - best cost %d\n",
613 current_best_cost);
614
615 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "CANNOT PRUNE: ");
616
617 dump_core("reprune_fsmnode_tokens() died\n");
618 }
619
620 prune_fsmnode_tokens(rec, current_best_cost+current_prune_delta, not_this_one);
621 ASSERT((float)current_best_cost + (float)current_prune_delta < (float)USHRT_MAX);
622 }
623
624 return current_prune_delta;
625 }
626
627
reset_best_cost_to_zero(srec * rec,costdata current_best_cost)628 void reset_best_cost_to_zero(srec* rec, costdata current_best_cost)
629 {
630 fsmarc_token* stoken;
631 stokenID token_index;
632 short i, num_states;
633
634
635 /*do the state tokens*/
636 for (token_index = rec->active_fsmarc_tokens;
637 token_index != MAXstokenID;
638 token_index = stoken->next_token_index)
639 {
640
641 stoken = &rec->fsmarc_token_array[ token_index];
642 num_states = stoken->num_hmm_states;
643 for (i = 0; i < num_states; i++)
644 {
645 if (stoken->cost[i] < MAXcostdata)
646 {
647 ASSERT(stoken->cost[i] >= current_best_cost);
648 stoken->cost[i] = (costdata)(stoken->cost[i] - (costdata) current_best_cost);
649 }
650 }
651 }
652 }
653
reset_cost_offsets(multi_srec * rec,frameID current_frame,costdata current_best_cost)654 static void reset_cost_offsets(multi_srec* rec, frameID current_frame,
655 costdata current_best_cost)
656 {
657 rec->cost_offset_for_frame[current_frame] = current_best_cost;
658 if (current_frame == 0)
659 rec->accumulated_cost_offset[current_frame] = current_best_cost;
660 else
661 rec->accumulated_cost_offset[current_frame] = rec->accumulated_cost_offset[current_frame-1] + current_best_cost;
662 }
663
664
665 /*--------------------------------------------------------------------------*
666 * *
667 * word_token functions *
668 * *
669 *--------------------------------------------------------------------------*/
670
671 /*this function allocates a new word token and remembers it in a list in the srec structure (to be used for backtrace later on*/
672
create_word_token(srec * rec)673 static wtokenID create_word_token(srec *rec)
674 {
675 wtokenID word_token_index;
676 word_token *word_token;
677
678 word_token_index = get_free_word_token(rec, NULL_IF_NO_TOKENS);
679
680 if (0 && word_token_index == MAXwtokenID)
681 {
682 /*FIX - use crec error handling*/
683 dump_core("create_word_token: cannot allocate word token - we need"
684 " to figure out a word pruning strategy when this happens!\n");
685 }
686
687 word_token = &(rec->word_token_array[word_token_index]);
688
689 return word_token_index;
690 }
691
692 /* gets rid of fsmnode which trace back to this word since
693 the word is not goingto make it ono the word lattice */
694
block_fsmnodes_per_backtrace(srec * rec,wtokenID wtoken_id)695 static int block_fsmnodes_per_backtrace(srec *rec, wtokenID wtoken_id)
696 {
697 int num_ftokens_blocked = 0;
698 ftokenID current_token_index = rec->active_fsmnode_tokens;
699 while (current_token_index != MAXftokenID) {
700 fsmnode_token *token = &(rec->fsmnode_token_array[current_token_index]);
701 if (token->word_backtrace == wtoken_id) {
702 num_ftokens_blocked++;
703 token->cost = MAXcostdata;
704 }
705 current_token_index = token->next_token_index;
706 }
707 return num_ftokens_blocked;
708 }
709
710 /* processing a word boundary,
711 current_token is the fsmnode_token to the left of the boundary
712 cost is the cost through this frame
713 pcurrent_word_threshold is the worst score for words on the prio.q
714 returns the word_token index just created
715 */
srec_process_word_boundary_nbest(srec * rec,nodeID end_node,wordID word,wtokenID word_backtrace,costdata cost,costdata * pcurrent_word_threshold,int * any_nodes_blocked)716 static wtokenID srec_process_word_boundary_nbest(srec* rec,
717 nodeID end_node,
718 wordID word,
719 wtokenID word_backtrace,
720 costdata cost,
721 costdata* pcurrent_word_threshold,
722 int *any_nodes_blocked)
723 {
724 wtokenID wtoken_index;
725 wtokenID return_wtoken_index;
726 wtokenID token_id_to_remove;
727
728 word_token* wtoken;
729
730 if (word_backtrace != MAXwtokenID)
731 {
732 word_token* btoken = &rec->word_token_array[word_backtrace];
733 if (btoken->end_time >= rec->current_search_frame)
734 {
735 SREC_STATS_INC_BAD_BACKTRACES();
736 return MAXwtokenID;
737 }
738 }
739 /*make new word token*/
740 wtoken_index = create_word_token(rec);
741 //ASSERT(wtoken_index != MAXwtokenID);
742 if (wtoken_index == MAXwtokenID)
743 {
744 /* we could have called reprune_word_tokens() here, but we instead called it
745 at the beginning of do_epsilon_updates() to avoid complications of
746 re-pruning word tokens too deep in the search update */
747 return_wtoken_index = MAXwtokenID;
748 return return_wtoken_index;
749 }
750
751 wtoken = &(rec->word_token_array[wtoken_index]);
752 wtoken->word = word;
753 wtoken->_word_end_time = 0; // new
754 wtoken->end_time = rec->current_search_frame;
755 wtoken->end_node = end_node;
756 wtoken->backtrace = word_backtrace;
757 wtoken->cost = cost;
758
759 token_id_to_remove = add_word_token_to_priority_q(rec->word_priority_q, wtoken_index, rec->word_token_array);
760
761 if (token_id_to_remove == wtoken_index)
762 return_wtoken_index = MAXwtokenID;
763 else
764 {
765 /* the word was truly added to the priority q, so we must
766 get the new worst word on that list */
767 *pcurrent_word_threshold = get_priority_q_threshold(rec->word_priority_q, rec->word_token_array);
768 return_wtoken_index = wtoken_index;
769 }
770
771 if (token_id_to_remove != MAXwtokenID)
772 {
773 /* Jean: must allow for this word_token to be recycled */
774
775 /* ok, the word won't we maintained, so there's no point to
776 continue to search this path (as headed by this fsmarc_token) */
777
778 *any_nodes_blocked += block_fsmnodes_per_backtrace( rec, token_id_to_remove);
779
780 /* we killed the fsmnode token associated with the word being removed.
781 But, we didn't kill it's word backtrace, so there may be word tokens
782 in the backtrace which cannot connect. But we can't really kill
783 the whole backtrace since it might be shared with other
784 active states, Mike's idea is to add a counter on the
785 word_tokens, which counts how many active paths are using
786 this word_token ... TODO */
787
788 /* print_word_token(rec, token_id_to_remove, "srec_process_word_boundary killed wtoken "); */
789 free_word_token(rec, token_id_to_remove);
790
791 }
792 return return_wtoken_index;
793 }
794
srec_get_parent_for_active_fsmnode(srec * rec,ftokenID ftoken_index)795 ftokenID* srec_get_parent_for_active_fsmnode(srec* rec, ftokenID ftoken_index)
796 {
797 ftokenID* parent_ftoken_index = &rec->active_fsmnode_tokens;
798 while( (*parent_ftoken_index) != MAXftokenID) {
799 fsmnode_token* parent_ftoken = &rec->fsmnode_token_array[ *parent_ftoken_index];
800 if( *parent_ftoken_index == ftoken_index)
801 return parent_ftoken_index;
802 parent_ftoken_index = &parent_ftoken->next_token_index;
803 }
804 return NULL;
805 }
806
807 /*---------------------------------------------------------------------------*
808 * *
809 * updates *
810 * *
811 *---------------------------------------------------------------------------*/
812
813
814 /*handles epsilon transitions (used for word boundaries). Epsilons come from active
815 fsmnode tokens and maximize into new FSMnode tokens (finds the best path to an FSM
816 node).
817
818 When we hit an
819 epsilon, create a word token, put it in the path, and remember it in a
820 list of all word tokens*/
821
do_epsilon_updates(srec * rec,costdata prune_delta,costdata best_cost)822 static int do_epsilon_updates(srec *rec, costdata prune_delta,
823 costdata best_cost)
824 {
825 fsmnode_token *new_ftoken;
826 fsmnode_token *current_ftoken;
827 wtokenID wtoken_index;
828 FSMnode* fsm_node;
829 FSMarc* fsm_arc;
830 costdata cost, cost_with_wtw;
831 ftokenID new_ftoken_index;
832 ftokenID current_ftoken_index;
833 costdata current_word_threshold;
834 arcID fsm_arc_index;
835 wordID word_with_wtw;
836 int num_fsm_nodes_updated=0, num_fsm_nodes_blocked, num_fsm_nodes_blocked2;
837 int num_wtokens_maybe_homonyms;
838 costdata current_prune_delta;
839 costdata current_prune_thresh;
840 altword_token* awtoken;
841
842
843 // printf("FRAME %d\n", rec->current_search_frame);
844 // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "BEFORE UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
845
846 current_word_threshold = MAXcostdata;
847 current_prune_delta = prune_delta;
848 current_prune_thresh = best_cost + current_prune_delta;
849
850 current_ftoken_index = rec->active_fsmnode_tokens;
851 while (current_ftoken_index != MAXftokenID)
852 {
853 // print_fsmnode_token(rec, current_ftoken_index, "processing ... ");
854 current_ftoken = &(rec->fsmnode_token_array[current_ftoken_index]);
855
856 cost = current_ftoken->cost; /*get last state of token*/
857 fsm_node = &rec->context->FSMnode_list[current_ftoken->FSMnode_index];
858 fsm_arc = NULL;
859
860 /* Jean: see below too, let's remember the wtoken_index we create in
861 case we need to re-use it. All N epsilon updates, and all M
862 M outgoing arcs can share, cuz this is the same arriving arc@frame */
863
864 wtoken_index = MAXwtokenID;
865
866 if( cost >= current_prune_thresh) {
867 ftokenID* parent_ftoken_index;
868 // the srec_get_parent_for_active_fsmnode() functions can be
869 // gotten rid of if we use a doubly-linked list (fwd/bwd ptrs)
870 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, current_ftoken_index);
871 if(!parent_ftoken_index) {
872 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, current_ftoken_index);
873 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
874 exit(1);
875 }
876 *parent_ftoken_index = current_ftoken->next_token_index;
877 // effectively release this fsmnode_token and go to next one
878 rec->best_token_for_node[ current_ftoken->FSMnode_index] = MAXftokenID;
879 free_fsmnode_token( rec, current_ftoken_index);
880 current_ftoken_index = *parent_ftoken_index;
881 continue;
882 }
883
884 num_fsm_nodes_updated++;
885 num_fsm_nodes_blocked = 0;
886 num_wtokens_maybe_homonyms = 0;
887 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
888 fsm_arc_index != MAXarcID;
889 fsm_arc_index = fsm_arc->linkl_next_arc)
890 {
891 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
892
893 /* consider only epsilon transitions */
894 if (fsm_arc->ilabel >= EPSILON_OFFSET)
895 continue;
896
897 /* can't loop to yourself on epsilon! */
898 ASSERT(fsm_arc->to_node != current_ftoken->FSMnode_index);
899
900 cost_with_wtw = current_ftoken->cost + fsm_arc->cost; /* WTW */
901 word_with_wtw = current_ftoken->word;
902 if(fsm_arc->olabel != WORD_EPSILON_LABEL)
903 word_with_wtw = fsm_arc->olabel ;
904
905 // we should compare cost_with_wtw but let's let the priority_q
906 // do the pruning
907 if (cost>=current_prune_thresh || fsm_arc->cost>=current_prune_thresh)
908 continue;
909
910 /*if word boundary, see if it crosses the word end threshold*/
911 /* no room on the word priority_q, so not worth pursuing */
912 if (fsm_arc->ilabel == WORD_BOUNDARY && cost_with_wtw >= current_word_threshold) {
913 continue; // goto NEXT_FTOKEN;
914 }
915
916 new_ftoken = NULL;
917 new_ftoken_index = rec->best_token_for_node[ fsm_arc->to_node];
918 if(new_ftoken_index != MAXftokenID)
919 new_ftoken = &rec->fsmnode_token_array[ new_ftoken_index];
920 if( new_ftoken && (current_ftoken->cost+fsm_arc->cost)<new_ftoken->cost) {
921 /* clobber it */
922 } else if(new_ftoken) {
923 /* merge it */
924 } else if(rec->fsmnode_token_freelist == MAXftokenID) {
925 /* create it? maybe */
926 current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, current_ftoken_index);
927 current_prune_thresh = best_cost + current_prune_delta;
928 }
929
930 if (fsm_arc->ilabel == WORD_BOUNDARY) {
931 /* 20030920, for sure the backtrace will change! */
932 // token->word_backtrace = MAXwtokenID;
933
934 wtoken_index = srec_process_word_boundary_nbest(rec,
935 current_ftoken->FSMnode_index,
936 word_with_wtw,
937 current_ftoken->word_backtrace,
938 cost_with_wtw,
939 ¤t_word_threshold,
940 &num_fsm_nodes_blocked);
941 if (wtoken_index != MAXwtokenID) {
942 WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+wtoken_index),
943 rec->word_token_array[wtoken_index].end_time - current_ftoken->silence_duration);
944 }
945 if( fsm_arc->olabel!=WORD_EPSILON_LABEL && wtoken_index != MAXwtokenID) {
946 num_wtokens_maybe_homonyms++;
947 if( num_wtokens_maybe_homonyms>1)
948 WORD_TOKEN_SET_HOMONYM( (rec->word_token_array+wtoken_index), 1);
949 }
950 /* now drop alternative words, note the use of current_token
951 because token is on the other side already */
952 if (current_ftoken->aword_backtrace != AWTNULL) {
953 awtoken = current_ftoken->aword_backtrace;
954 for (; awtoken != AWTNULL; awtoken = awtoken->next_token) {
955 wtokenID wti;
956 wti = srec_process_word_boundary_nbest(rec,
957 current_ftoken->FSMnode_index,
958 awtoken->word,
959 awtoken->word_backtrace,
960 cost_with_wtw + awtoken->costdelta,
961 ¤t_word_threshold,
962 &num_fsm_nodes_blocked2);
963 }
964 /* if we don't free the altwords here, i had thought that
965 updates from stateN would make the altwords grow and grow,
966 but by that time all the fsmnodes are brand new */
967 /* leaving them alive allows them to propagate to state0 thru
968 other epsilons (eg non .wb) to new nodes but we don't
969 use such arcs.
970 the .wb would not get dropped again 'cuz we check
971 for that in wtoken_index above.
972 this is quite complex and the case for dropping word_tokens
973 from the node AFTER the .wb can be made
974 ie. would need a re-write of do_epsilon_updates() */
975 if( current_ftoken->aword_backtrace != AWTNULL)
976 free_altword_token_batch(rec, current_ftoken->aword_backtrace);
977 current_ftoken->aword_backtrace = AWTNULL;
978 /*print_fsmnode_token(rec, token-rec->fsmnode_token_array, "123a");*/
979 }
980
981 if( wtoken_index != MAXwtokenID) {
982
983 if( new_ftoken == NULL) {
984 /* create token for the other side */
985 new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
986 // this should not happen because of the above check near
987 // fsmnode_token_freelist == MAXftokenID
988 ASSERT(new_ftoken_index != MAXftokenID);
989 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
990 new_ftoken->word_backtrace = wtoken_index;
991 new_ftoken->cost = cost_with_wtw;
992 new_ftoken->word = WORD_EPSILON_LABEL;
993 new_ftoken->FSMnode_index = fsm_arc->to_node;
994 new_ftoken->aword_backtrace = AWTNULL;
995 new_ftoken->next_token_index = current_ftoken->next_token_index;
996 current_ftoken->next_token_index = new_ftoken_index;
997 rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
998 } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
999 /* update token on the other side */
1000 ftokenID *parent_ftoken_index;
1001 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
1002 new_ftoken->cost = cost_with_wtw;
1003 new_ftoken->word_backtrace = wtoken_index;
1004 new_ftoken->word = WORD_EPSILON_LABEL;
1005 // unchanged token->FSMnode_index = fsm_arc->to_node;
1006 // because this token was updated, we need to reprocess it, right after
1007 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
1008 if(!parent_ftoken_index) {
1009 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
1010 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
1011 exit(1);
1012 }
1013 *parent_ftoken_index = new_ftoken->next_token_index;
1014 new_ftoken->next_token_index = current_ftoken->next_token_index;
1015 current_ftoken->next_token_index = new_ftoken_index;
1016 rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
1017 /* new_ftoken->aword_backtrace must be null, alts here were
1018 processed and dropped in srec_process_word_boundary_nbest() */
1019 if(new_ftoken->aword_backtrace != AWTNULL) {
1020 PLogError( ("Error: internal search error near %s %d\n"), __FILE__, __LINE__);
1021 continue;
1022 }
1023 } else {
1024 /* token on other side is same or better, just leave it */
1025 }
1026 }
1027 }
1028 else if(fsm_arc->ilabel == EPSILON_LABEL) {
1029 if( new_ftoken == NULL) {
1030 /* create token for the other side */
1031 new_ftoken_index = get_free_fsmnode_token(rec, NULL_IF_NO_TOKENS);
1032 // this should not happen because of the above check near
1033 // fsmnode_token_freelist == MAXftokenID
1034 ASSERT(new_ftoken_index != MAXftokenID);
1035 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
1036 new_ftoken->word_backtrace = current_ftoken->word_backtrace;
1037 new_ftoken->cost = cost_with_wtw;
1038 new_ftoken->word = word_with_wtw;
1039 new_ftoken->FSMnode_index = fsm_arc->to_node;
1040 new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
1041 new_ftoken->next_token_index = current_ftoken->next_token_index;
1042 current_ftoken->next_token_index = new_ftoken_index;
1043 rec->best_token_for_node[fsm_arc->to_node] = new_ftoken_index;
1044 } else if(new_ftoken && cost_with_wtw<new_ftoken->cost) {
1045 /* update token on the other side */
1046 ftokenID *parent_ftoken_index;
1047 new_ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
1048 new_ftoken->cost = cost_with_wtw;
1049 new_ftoken->word_backtrace = current_ftoken->word_backtrace;
1050 new_ftoken->word = word_with_wtw;
1051 /* here we are giving up the path and alternatives that existed at
1052 this node, which is not great! The new (better) top choice
1053 coming in and it's alternatives are propagated instead.
1054 TODO: merge the alternative lists and the previous top choice
1055 */
1056 if(new_ftoken->aword_backtrace!=AWTNULL)
1057 free_altword_token_batch( rec, new_ftoken->aword_backtrace);
1058 new_ftoken->aword_backtrace = AWTNULL;
1059 new_ftoken->aword_backtrace = refcopy_altwords(rec, current_ftoken->aword_backtrace);
1060 // unchanged token->FSMnode_index = fsm_arc->to_node;
1061 // because this token was updated, we need to re-process it, right after
1062 parent_ftoken_index = srec_get_parent_for_active_fsmnode( rec, new_ftoken_index);
1063 if(!parent_ftoken_index) {
1064 PLogError( ("Error: internal search error near %s %d, finding %d\n"), __FILE__, __LINE__, new_ftoken_index);
1065 print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "in DO_UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
1066 exit(1);
1067 }
1068 *parent_ftoken_index = new_ftoken->next_token_index;
1069 new_ftoken->next_token_index = current_ftoken->next_token_index;
1070 current_ftoken->next_token_index = new_ftoken_index;
1071 rec->best_token_for_node[ fsm_arc->to_node] = new_ftoken_index;
1072 } else {
1073 /* token on other side is same or better, just leave it */
1074 /* todo: maybe merge alternative lists? */
1075 }
1076 }
1077 } /* loop over arcs */
1078
1079 ASSERT( current_ftoken->cost != MAXcostdata);
1080 if( num_fsm_nodes_blocked) {
1081 /* we do not want to propagate fsm node tokens for paths that have
1082 just been killed on the basis of no space for word propagation */
1083 prune_fsmnode_tokens(rec, MAXcostdata/2, current_ftoken_index);
1084 }
1085
1086 // NEXT_FTOKEN:
1087 current_ftoken_index = current_ftoken->next_token_index;
1088 }
1089
1090 // print_fsmnode_token_list(rec, rec->active_fsmnode_tokens, "AFTER UPDATE_EPSILONS ACTIVE_FSMNODE_TOKENS: \n");
1091
1092 sanity_check_altwords(rec, rec->altword_token_freelist);
1093 return num_fsm_nodes_updated;
1094 }
1095
update_internal_hmm_states(srec * rec,costdata * pcurrent_prune_delta,costdata * pcurrent_best_cost,costdata * precomputed_model_scores)1096 static void update_internal_hmm_states(srec *rec, costdata *pcurrent_prune_delta,
1097 costdata *pcurrent_best_cost,
1098 costdata *precomputed_model_scores)
1099 {
1100 stokenID current_token_index;
1101 fsmarc_token *current_token;
1102 costdata current_best_cost;
1103 costdata current_prune_thresh;
1104 costdata current_prune_delta;
1105 costdata model_cost;
1106 asr_int16_t any_alive;
1107 HMMInfo *hmm_info;
1108 modelID model_index;
1109 asr_int16_t internal_state, end_state;
1110 arcID fsm_arc_index;
1111 FSMarc* fsm_arc;
1112
1113 costdata prev_cost;
1114 costdata self_loop_cost;
1115
1116 current_best_cost = *pcurrent_best_cost;
1117 current_prune_delta = *pcurrent_prune_delta;
1118 current_prune_thresh = current_best_cost + current_prune_delta;
1119
1120 ASSERT(((float)current_best_cost + current_prune_delta) < (float)USHRT_MAX);
1121
1122 /* best_token_for_arc must be reset here, cuz the same array might have
1123 been used by another gender. Alternatively we could have let each
1124 recog use it's own array thereby save cpu at expense of memory */
1125 for (fsm_arc_index = 0; fsm_arc_index < rec->context->num_arcs; fsm_arc_index++)
1126 rec->best_token_for_arc[fsm_arc_index] = MAXstokenID;
1127
1128 current_token_index = rec->active_fsmarc_tokens;
1129 while (current_token_index != MAXstokenID)
1130 {
1131 current_token = &(rec->fsmarc_token_array[current_token_index]);
1132
1133 fsm_arc_index = current_token->FSMarc_index;
1134 fsm_arc = &rec->context->FSMarc_list[fsm_arc_index];
1135
1136 /* best_token_for_arc must be set here, cuz it was reset above */
1137 rec->best_token_for_arc[fsm_arc_index] = current_token_index;
1138
1139 hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
1140 any_alive = 0;
1141 end_state = current_token->num_hmm_states - 1;
1142
1143 for (internal_state = end_state; internal_state >= 0; internal_state--)
1144 {
1145
1146 model_index = hmm_info->state_indices[internal_state];
1147 model_cost = precomputed_model_scores[model_index];
1148
1149 /*better to come from previous or self?*/
1150
1151 if (internal_state > 0)
1152 {
1153 prev_cost = current_token->cost[internal_state-1];
1154 /* a duration model can be applied here,
1155 by changing the prev_cost according to some function of
1156 the current_token->duration[internal_state-1] rec->avg_state_durations[ prev_model_index] */
1157 if (prev_cost < current_prune_thresh)
1158 {
1159 modelID prev_model_index;
1160 prev_cost = (costdata)(prev_cost + (costdata) model_cost);
1161 /* apply duration model for "next" transition, note that it's nice
1162 to access the duration model (avg_state_durations) somehwere
1163 other than the acoustic models which could be far away in memory
1164 arrive penalty would be applied here too if it was reqd */
1165
1166 prev_model_index = hmm_info->state_indices[internal_state-1];
1167 prev_cost = (costdata)(prev_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[ prev_model_index],
1168 current_token->duration[internal_state-1]));
1169
1170 }
1171 }
1172 else
1173 {
1174 prev_cost = MAXcostdata;
1175 }
1176
1177 self_loop_cost = current_token->cost[internal_state];
1178 /* a duration model can be applied here,
1179 by changing the self_loop_cost according to some function of
1180 the current_token->duration[internal_state] rec->avg_state_durations[ prev_model_index] */
1181 if (self_loop_cost < current_prune_thresh)
1182 {
1183 self_loop_cost = (costdata)(self_loop_cost + (costdata) model_cost);
1184 /* apply duration model for "loop" transition */
1185
1186 self_loop_cost = (costdata)(self_loop_cost + (costdata) duration_penalty_loop(rec->avg_state_durations[ model_index],
1187 current_token->duration[internal_state]));
1188
1189 }
1190
1191 if (prev_cost < self_loop_cost)
1192 {
1193 current_token->cost[internal_state] = prev_cost;
1194 current_token->word_backtrace[internal_state] = current_token->word_backtrace[internal_state-1];
1195 current_token->word[internal_state] = current_token->word[internal_state-1];
1196 current_token->duration[internal_state] = 1;
1197 if (current_token->word[internal_state-1] != MAXwordID)
1198 {
1199 if (current_token->aword_backtrace[internal_state] != AWTNULL)
1200 free_altword_token_batch(rec,
1201 current_token->aword_backtrace[internal_state]);
1202 current_token->aword_backtrace[internal_state] = refcopy_altwords(rec, current_token->aword_backtrace[internal_state-1]);
1203 /*print_fsmarc_token(rec, current_token_index, "123c");*/
1204 }
1205 else
1206 {
1207 /* if there's no top choice, there shouldn't be alternatives! */
1208 ASSERT(current_token->aword_backtrace[internal_state] == AWTNULL);
1209 ASSERT(current_token->aword_backtrace[internal_state-1] == AWTNULL);
1210 }
1211 }
1212 else
1213 {
1214 current_token->cost[internal_state] = self_loop_cost;
1215 current_token->duration[internal_state]++;
1216 }
1217
1218 if (current_token->cost[internal_state] < current_prune_thresh)
1219 {
1220 any_alive = 1;
1221 if (current_token->cost[internal_state] < current_best_cost)
1222 {
1223 current_best_cost = current_token->cost[internal_state];
1224 current_prune_thresh = current_best_cost + current_prune_delta;
1225 }
1226 }
1227 }
1228 current_token_index = current_token->next_token_index;
1229 }
1230 *pcurrent_best_cost = current_best_cost;
1231 *pcurrent_prune_delta = current_prune_delta;
1232 }
1233
1234
1235
1236
GetNumArcsArrivingClip2(srec_context * context,FSMnode * fsm_node)1237 static int GetNumArcsArrivingClip2(srec_context* context, FSMnode* fsm_node)
1238 {
1239 arcID fpa = fsm_node->first_prev_arc;
1240 FSMarc* arc;
1241
1242 if (fpa == MAXarcID)
1243 return 0;
1244 arc = &context->FSMarc_list[fpa];
1245 if (arc->linkl_prev_arc == MAXarcID)
1246 return 1;
1247 else
1248 return 2;
1249 }
1250
update_from_hmms_to_fsmnodes(srec * rec,costdata prune_delta,costdata best_cost)1251 static int update_from_hmms_to_fsmnodes(srec *rec, costdata prune_delta, costdata best_cost)
1252 {
1253 stokenID current_token_index;
1254 fsmarc_token *current_token;
1255 int end_state;
1256 costdata end_cost;
1257 costdata current_prune_thresh;
1258 costdata current_prune_delta; /*may get tighter to keep num fsmnodes under control*/
1259 // vFSMarc vfsm_arc;
1260 FSMarc* fsm_arc;
1261 FSMnode* fsm_node;
1262 // vFSMnode vfsm_node;
1263 arcID fsm_arc_index;
1264 nodeID to_node_index;
1265 ftokenID new_ftoken_index;
1266 fsmnode_token *ftoken;
1267 modelID end_model_index;
1268 labelID ilabel;
1269 short end_cost_equality_hack;
1270 HMMInfo* hmm_info;
1271 altword_token *awtoken, *q;
1272 int num_fsmnode_updates = 0;
1273
1274 current_prune_delta = prune_delta;
1275 current_prune_thresh = best_cost + current_prune_delta;
1276 current_token_index = rec->active_fsmarc_tokens;
1277
1278 for (ilabel = 0; ilabel < NODE_INFO_NUMS; ilabel++)
1279 {
1280 rec->current_best_ftoken_cost[ilabel] = MAXcostdata / 2;
1281 rec->current_best_ftoken_index[ilabel] = MAXftokenID;
1282 }
1283 sanity_check_altwords(rec, rec->altword_token_freelist);
1284
1285 while (current_token_index != MAXstokenID)
1286 {
1287 current_token = &(rec->fsmarc_token_array[current_token_index]);
1288
1289 /*propagate from end of state token to new FSM node*/
1290 end_state = (char) current_token->num_hmm_states - 1;
1291
1292 ASSERT((current_token->aword_backtrace[end_state] == AWTNULL)
1293 || (current_token->word[end_state] != MAXwordID));
1294 end_cost = current_token->cost[end_state];
1295
1296 /* anticipated repruning: make sure there is enough space before
1297 beginning complex computation */
1298 if (rec->word_priority_q->max_in_q>1 && rec->altword_token_freelist_len < 3*rec->word_priority_q->max_in_q)
1299 reprune_altword_tokens(rec);
1300
1301 if (end_cost < current_prune_thresh)
1302 {
1303 num_fsmnode_updates++;
1304 fsm_arc_index = current_token->FSMarc_index;
1305 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
1306
1307 hmm_info = &rec->context->hmm_info_for_ilabel[ fsm_arc->ilabel];
1308
1309 end_model_index = hmm_info->state_indices[end_state];
1310
1311 end_cost = (costdata)(end_cost + (costdata) duration_penalty_depart(rec->avg_state_durations[end_model_index],
1312 current_token->duration[end_state]));
1313 to_node_index = fsm_arc->to_node;
1314 new_ftoken_index = rec->best_token_for_node[to_node_index];
1315 if (new_ftoken_index == MAXftokenID)
1316 {
1317 /*we need to make sure there is room in the new_states array
1318 and there are free state tokens*/
1319 if (rec->fsmnode_token_freelist == MAXftokenID)
1320 {
1321 /*make sure there is room for another FSMnode token
1322 - if not, prune until there is room*/
1323 current_prune_delta = reprune_fsmnode_tokens(rec, best_cost, current_prune_delta, MAXftokenID);
1324 current_prune_thresh = best_cost + current_prune_delta;
1325 }
1326
1327 /*because of the above check, this should always succeed*/
1328 new_ftoken_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
1329
1330 ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
1331 ftoken->FSMnode_index = to_node_index;
1332 ftoken->next_token_index = rec->active_fsmnode_tokens;
1333 ftoken->cost = end_cost;
1334 ftoken->word_backtrace = current_token->word_backtrace[end_state];
1335 ftoken->word = current_token->word[end_state];
1336 if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
1337 {
1338 ftoken->silence_duration = current_token->duration[end_state];
1339 }
1340 else
1341 {
1342 ftoken->silence_duration = 0;
1343 }
1344 if (ftoken->word != MAXwordID)
1345 {
1346 arcID narr;
1347 fsm_node = &rec->context->FSMnode_list[ fsm_arc->to_node];
1348 /* when there is only one arc arriving, a refcopy is good enough
1349 and saves memory */
1350 narr = (arcID) GetNumArcsArrivingClip2(rec->context, fsm_node);
1351 if (narr > 1)
1352 ftoken->aword_backtrace = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
1353 else
1354 ftoken->aword_backtrace = refcopy_altwords(rec, current_token->aword_backtrace[end_state]);
1355 }
1356 else
1357 {
1358 /* if there's no top choice, there shouldn't be alternatives! */
1359 ASSERT(current_token->aword_backtrace[end_state] == AWTNULL);
1360 ftoken->aword_backtrace = AWTNULL;
1361 }
1362 rec->active_fsmnode_tokens = new_ftoken_index;
1363 rec->best_token_for_node[to_node_index] = new_ftoken_index;
1364 }
1365 else /* a token already exists, use it! */
1366 {
1367
1368 ftoken = &(rec->fsmnode_token_array[new_ftoken_index]);
1369 ASSERT( ((current_token->word[end_state] == MAXwordID) && (ftoken->word == MAXwordID))
1370 || ((current_token->word[end_state] != MAXwordID) && (ftoken->word != MAXwordID)) );
1371
1372 /* this is a hack for preferring the shorter of the backtrace words
1373 when scores are equal, used to prefer longer pau2 word */
1374 end_cost_equality_hack = 0;
1375 if (end_cost == ftoken->cost)
1376 {
1377 if (current_token->word_backtrace[end_state] != ftoken->word_backtrace
1378 && current_token->word_backtrace[end_state] != MAXwtokenID)
1379 {
1380 frameID ct_end_time = MAXframeID, et_end_time = 0;
1381 if (current_token->word_backtrace[end_state] != MAXwtokenID)
1382 ct_end_time = rec->word_token_array[current_token->word_backtrace[end_state]].end_time;
1383 if (ftoken->word_backtrace != MAXwtokenID)
1384 et_end_time = rec->word_token_array[ftoken->word_backtrace].end_time;
1385 if (ct_end_time < et_end_time)
1386 end_cost_equality_hack = 1;
1387 }
1388 }
1389
1390 if (end_cost < ftoken->cost || end_cost_equality_hack)
1391 {
1392 /* new one coming in is better, so push the current state down */
1393 /* ftoken info goes into awtoken */
1394 if (ftoken->word != MAXwordID)
1395 {
1396 /* copy_altwords() */
1397 awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
1398 if (awtoken != AWTNULL)
1399 {
1400 awtoken->costdelta = ftoken->cost - end_cost;
1401 awtoken->word_backtrace = ftoken->word_backtrace;
1402 awtoken->word = ftoken->word;
1403
1404 /* ensure full ownership! */
1405 q = ftoken->aword_backtrace;
1406 if (q != AWTNULL && q->refcount > 1)
1407 {
1408 awtoken->next_token = copy_altwords(rec, ftoken->aword_backtrace, ftoken->cost - end_cost);
1409 free_altword_token_batch(rec, ftoken->aword_backtrace);
1410 /* reversed order above here !! */
1411 }
1412 else
1413 {
1414 awtoken->next_token = ftoken->aword_backtrace;
1415 count_altword_token( rec, awtoken);
1416 for (q = awtoken->next_token; q; q = q->next_token)
1417 q->costdelta += ftoken->cost - end_cost;
1418 }
1419 ftoken->aword_backtrace = awtoken;
1420 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
1421 if( (q=ftoken->aword_backtrace)!=AWTNULL) {
1422 for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
1423 q->next_token = copy_altwords(rec, current_token->aword_backtrace[end_state], 0);
1424 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
1425 /* awtoken->costbasis = &ftoken->cost; */
1426 ftoken->aword_backtrace->refcount = 1;
1427 }
1428 }
1429 }
1430 else
1431 {
1432 /* if there's no top choice, there shouldn't be alternatives! */
1433 ASSERT(ftoken->aword_backtrace == AWTNULL);
1434 }
1435 /* and stoken info goes into ftoken */
1436 ftoken->cost = end_cost;
1437 ftoken->word_backtrace = current_token->word_backtrace[end_state];
1438 ftoken->word = current_token->word[end_state];
1439 if (end_model_index == SILENCE_MODEL_INDEX && ftoken->word != rec->context->beg_silence_word)
1440 {
1441 ftoken->silence_duration = current_token->duration[end_state];
1442 }
1443 else
1444 {
1445 ftoken->silence_duration = 0;
1446 }
1447 }
1448 else
1449 {
1450 /* new arc arriving is worse */
1451 /* print_fsmarc_token(rec, current_token_index, "new_arc_arriving worse");
1452 print_fsmnode_token(rec, new_ftoken_index, "new_arc_arriving tonode");*/
1453 /* append it to the alt list */
1454 /* stoken info goes into the awtoken, ftoken unchanged */
1455 if (ftoken->word != MAXwordID)
1456 {
1457 /* copy_altwords() */
1458 awtoken = get_free_altword_token(rec, NULL_IF_NO_TOKENS);
1459 if (awtoken != AWTNULL)
1460 {
1461 awtoken->costdelta = end_cost - ftoken->cost;
1462 awtoken->word = current_token->word[end_state];
1463 awtoken->word_backtrace = current_token->word_backtrace[end_state];
1464
1465 if (current_token->aword_backtrace[end_state] != AWTNULL)
1466 awtoken->next_token = copy_altwords(rec,
1467 current_token->aword_backtrace[end_state],
1468 awtoken->costdelta);
1469 else
1470 awtoken->next_token = AWTNULL;
1471
1472 /* ensure full ownership!, this is new here! */
1473 q = ftoken->aword_backtrace;
1474 if (q != AWTNULL && q->refcount > 1)
1475 {
1476 q = copy_altwords(rec, ftoken->aword_backtrace, 0);
1477 free_altword_token_batch(rec, ftoken->aword_backtrace);
1478 ftoken->aword_backtrace = q;
1479 }
1480 }
1481 if (ftoken->aword_backtrace)
1482 {
1483 for (q = ftoken->aword_backtrace; q->next_token; q = q->next_token) ;
1484 q->next_token = awtoken;
1485 }
1486 else
1487 {
1488 ftoken->aword_backtrace = awtoken;
1489 }
1490 if (ftoken->aword_backtrace!=AWTNULL) {
1491 ftoken->aword_backtrace->refcount = 1;
1492 ftoken->aword_backtrace = sizewise_altwords(rec, ftoken->aword_backtrace);
1493 }
1494 }
1495 }
1496 /*print_fsmnode_token(rec, new_ftoken_index, "123e reused-token ");*/
1497 }
1498 ilabel = rec->context->FSMnode_info_list[ ftoken->FSMnode_index];
1499 ASSERT(ilabel < NODE_INFO_NUMS);
1500 if (ftoken->cost < rec->current_best_ftoken_cost[ilabel])
1501 {
1502 rec->current_best_ftoken_cost[ilabel] = ftoken->cost;
1503 rec->current_best_ftoken_index[ilabel] = new_ftoken_index;
1504 }
1505 if (ftoken->cost < rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN])
1506 {
1507 rec->current_best_ftoken_cost[NODE_INFO_UNKNOWN] = ftoken->cost;
1508 rec->current_best_ftoken_index[NODE_INFO_UNKNOWN] = new_ftoken_index;
1509 }
1510 ASSERT(ftoken->word != MAXwordID || ftoken->aword_backtrace == AWTNULL);
1511 }
1512 current_token_index = current_token->next_token_index;
1513 }
1514 sanity_check_altwords(rec, rec->altword_token_freelist);
1515 return num_fsmnode_updates;
1516 }
1517
update_from_current_fsm_nodes_into_new_HMMs(srec * rec,costdata * pcurrent_prune_delta,costdata * pcurrent_best_cost,costdata * precomputed_model_scores)1518 static int update_from_current_fsm_nodes_into_new_HMMs(srec* rec,
1519 costdata *pcurrent_prune_delta,
1520 costdata *pcurrent_best_cost,
1521 costdata *precomputed_model_scores)
1522 {
1523 costdata prev_cost;
1524 FSMnode* fsm_node;
1525 FSMarc* fsm_arc;
1526 arcID fsm_arc_index;
1527 HMMInfo *hmm_info;
1528 modelID model_index;
1529 fsmarc_token *token;
1530 stokenID new_token_index = MAXstokenID;
1531 costdata cost;
1532 costdata current_prune_thresh;
1533 costdata current_prune_delta = *pcurrent_prune_delta;
1534 costdata current_best_cost = *pcurrent_best_cost;
1535 ftokenID ftoken_index;
1536 ftokenID old_ftoken_index;
1537 fsmnode_token *fsmnode_token;
1538 int num_fsm_nodes_updated = 0;
1539 costdata orig_prune_delta;
1540
1541 ftoken_index = rec->active_fsmnode_tokens;
1542
1543 current_prune_thresh = *pcurrent_best_cost + *pcurrent_prune_delta;
1544 orig_prune_delta = *pcurrent_prune_delta;
1545
1546 sanity_check_altwords(rec, rec->altword_token_freelist);
1547
1548 while (ftoken_index != MAXftokenID)
1549 {
1550 fsmnode_token = &rec->fsmnode_token_array[ftoken_index];
1551
1552 prev_cost = fsmnode_token->cost; /*get last state of token*/
1553 if (fsmnode_token->FSMnode_index == rec->context->end_node)
1554 {
1555 prev_cost = MAXcostdata;
1556 }
1557
1558 if (prev_cost < current_prune_thresh)
1559 {
1560 num_fsm_nodes_updated++;
1561
1562 fsm_node = &rec->context->FSMnode_list[fsmnode_token->FSMnode_index];
1563
1564 /* loop over arcs leaving this fsm_node */
1565 for (fsm_arc_index = fsm_node->un_ptr.first_next_arc;
1566 fsm_arc_index != MAXarcID;
1567 fsm_arc_index = fsm_arc->linkl_next_arc)
1568 {
1569 labelID ilabel;
1570 wordID olabel;
1571 nodeID nextnode;
1572
1573 fsm_arc = &rec->context->FSMarc_list[ fsm_arc_index];
1574
1575 ilabel = fsm_arc->ilabel;
1576 olabel = fsm_arc->olabel;
1577 nextnode = fsm_arc->to_node;
1578
1579 if (ilabel >= EPSILON_OFFSET)
1580 {
1581 /*so, not an epsilon arc*/
1582 hmm_info = &rec->context->hmm_info_for_ilabel[ilabel];
1583
1584 model_index = hmm_info->state_indices[0];
1585
1586 cost = prev_cost + precomputed_model_scores[model_index];
1587 cost = (costdata)(cost + (costdata) fsm_arc->cost);
1588
1589 if (cost < current_prune_thresh)
1590 {
1591 /*new node to keep*/
1592
1593 /* look for the fsmarc_token* token, into which to maximize, else create new one */
1594 if (rec->best_token_for_arc[fsm_arc_index] == MAXstokenID)
1595 {
1596
1597 /*make sure there is room for another state token - if not, prune
1598 until there is room*/
1599 /*we need to make sure there is room in the new_states array and
1600 there are free state tokens*/
1601 if (rec->fsmarc_token_freelist == MAXstokenID)
1602 {
1603 current_prune_delta = reprune_new_states(rec, current_best_cost, current_prune_delta);
1604 }
1605
1606 /* because of the above check, this should always succeed */
1607 new_token_index = setup_free_fsmarc_token(rec, fsm_arc, fsm_arc_index, EXIT_IF_NO_TOKENS);
1608
1609 token = &(rec->fsmarc_token_array[new_token_index]);
1610
1611 token->next_token_index = rec->active_fsmarc_tokens;
1612 rec->active_fsmarc_tokens = new_token_index;
1613 rec->num_new_states++;
1614
1615 rec->best_token_for_arc[fsm_arc_index] = new_token_index;
1616 token->cost[0] = MAXcostdata;
1617 }
1618 else
1619 {
1620 new_token_index = rec->best_token_for_arc[fsm_arc_index];
1621 token = &(rec->fsmarc_token_array[ new_token_index]);
1622 }
1623
1624 if (cost < token->cost[0])
1625 {
1626 token->cost[0] = cost;
1627 token->duration[0] = 1;
1628 token->word_backtrace[0] = fsmnode_token->word_backtrace;
1629 if (token->aword_backtrace[0] != AWTNULL)
1630 free_altword_token_batch(rec, token->aword_backtrace[0]);
1631 token->aword_backtrace[0] = AWTNULL;
1632 token->aword_backtrace[0] = refcopy_altwords(rec, fsmnode_token->aword_backtrace);
1633
1634 if (olabel != WORD_EPSILON_LABEL)
1635 {
1636 token->word[0] = olabel;
1637 //ASSERT(token->aword_backtrace[0] == AWTNULL);
1638 }
1639 else
1640 {
1641 token->word[0] = fsmnode_token->word;
1642 }
1643 ASSERT(token->word[0] != MAXwordID
1644 || token->aword_backtrace[0] == AWTNULL);
1645 if (cost < current_best_cost)
1646 {
1647 current_best_cost = cost;
1648 current_prune_delta = orig_prune_delta; /*if we have a new best cost, the prune delta could go back up*/
1649 current_prune_thresh = cost + current_prune_delta;
1650 ASSERT((float)cost + (float)current_prune_delta < (float)USHRT_MAX);
1651 }
1652 }
1653 }
1654 }
1655 }
1656 }
1657 rec->best_token_for_node[fsmnode_token->FSMnode_index] = MAXftokenID; /*done with this node - remove it from the array*/
1658 old_ftoken_index = ftoken_index;
1659
1660 ftoken_index = fsmnode_token->next_token_index;
1661 free_fsmnode_token(rec, old_ftoken_index); /*done with this node - free the token*/
1662 rec->active_fsmnode_tokens = ftoken_index; /*needed for sanity_check_altwords*/
1663 }
1664 /*done with all the tokens, set active tokens to NULL*/
1665 rec->active_fsmnode_tokens = MAXftokenID;
1666 sanity_check_altwords(rec, rec->altword_token_freelist);
1667
1668 *pcurrent_best_cost = current_best_cost;
1669 *pcurrent_prune_delta = current_prune_delta;
1670
1671 return num_fsm_nodes_updated;
1672 }
1673
1674 #if USE_COMP_STATS
start_front_end_clock(void)1675 void start_front_end_clock(void)
1676 {
1677 if (!comp_stats)
1678 comp_stats = init_comp_stats();
1679 start_cs_clock(&comp_stats->front_end);
1680 }
stop_front_end_clock(void)1681 void stop_front_end_clock(void)
1682 {
1683 end_cs_clock(&comp_stats->front_end, 1);
1684 }
1685 #endif
1686
1687
1688 /*---------------------------------------------------------------------------*
1689 * *
1690 * begin and end *
1691 * *
1692 *---------------------------------------------------------------------------*/
1693
1694 /*gets things started for the viterbi search - sets up things for frame 0*/
1695
srec_begin(srec * rec,int begin_syn_node)1696 int srec_begin(srec *rec, int begin_syn_node)
1697 {
1698 FSMnode* fsm_node;
1699 fsmnode_token *token;
1700 stokenID new_token_index;
1701 nodeID node_index;
1702 arcID arc_index;
1703
1704 if (!rec || !rec->context)
1705 {
1706 log_report("Error: bad inputs to srec_begin()\n");
1707 return 1;
1708 }
1709 if (!rec->context->whether_prepared)
1710 {
1711 log_report("srec_begin: Grammar not prepared. Compiling!\n");
1712 FST_PrepareContext(rec->context);
1713
1714 if (!rec->context->whether_prepared)
1715 {
1716 PLogError("ESR_INVALID_STATE: Grammar can not be compiled (FST_PrepareContext failed)");
1717 return ESR_INVALID_STATE ;
1718 }
1719 }
1720
1721 #if USE_COMP_STATS
1722 if (comp_stats == NULL)
1723 comp_stats = init_comp_stats();
1724 #endif
1725 /*initialize token storage - not clear we really need this - as long as they
1726 are managed correctly, we should be able to do this on startup - not each utt*/
1727 initialize_free_fsmarc_tokens(rec);
1728 initialize_free_word_tokens(rec);
1729 initialize_free_fsmnode_tokens(rec);
1730 initialize_word_lattice(rec->word_lattice);
1731 initialize_free_altword_tokens(rec);
1732
1733 if (rec->context->num_nodes > rec->max_fsm_nodes)
1734 {
1735 log_report("Error: srec_begin failing due to too many grammar nodes\n");
1736 return 1;
1737 }
1738 for (node_index = 0;node_index < rec->context->num_nodes;node_index++)
1739 {
1740 rec->best_token_for_node[node_index] = MAXftokenID;
1741 }
1742 if (rec->context->num_arcs > rec->max_fsm_arcs)
1743 {
1744 log_report("Error: srec_begin failing due to too many grammar arcs\n");
1745 return 1;
1746 }
1747 for (arc_index = 0;arc_index < rec->context->num_arcs;arc_index++)
1748 {
1749 rec->best_token_for_arc[arc_index] = MAXstokenID;
1750 }
1751 rec->srec_ended = 0;
1752 rec->num_new_states = 0;
1753 rec->current_best_cost = 0;
1754 rec->current_prune_delta = rec->prune_delta;
1755
1756 /*need help from johan - does ths FSM only have one start node?
1757 Which one is it? assume just one and it is node 0*/
1758
1759 fsm_node = &rec->context->FSMnode_list[ rec->context->start_node];
1760 node_index = (nodeID) rec->context->start_node;
1761 /* node_index is still 0 at this point */
1762
1763 /*now we just need to setup an initial fsmnode token (for begin FSM node) and then do epsilon updates*/
1764
1765 rec->active_fsmarc_tokens = MAXstokenID;
1766
1767 new_token_index = get_free_fsmnode_token(rec, EXIT_IF_NO_TOKENS);
1768
1769 token = &(rec->fsmnode_token_array[new_token_index]);
1770 token->word_backtrace = MAXwtokenID; /* real value set below*/
1771 token->cost = 0;
1772 token->word = MAXwordID;
1773 token->FSMnode_index = node_index;
1774 token->next_token_index = MAXftokenID;
1775 token->aword_backtrace = AWTNULL;
1776
1777 rec->best_token_for_node[node_index] = new_token_index;
1778 rec->active_fsmnode_tokens = new_token_index;
1779 rec->current_search_frame = 0;
1780
1781 do_epsilon_updates(rec, rec->prune_delta, 0);
1782 return 0;
1783 }
1784
srec_force_the_end(srec * rec,frameID end_frame,wordID end_word)1785 void srec_force_the_end(srec* rec, frameID end_frame, wordID end_word)
1786 {
1787 srec_word_lattice* wl = rec->word_lattice;
1788 wtokenID wtoken_index, tmp;
1789 frameID frame;
1790 wtoken_index = wl->words_for_frame[end_frame];
1791 if (wtoken_index == MAXwtokenID)
1792 {
1793 for (frame = end_frame - 1; frame > 20; frame--)
1794 {
1795 if (wl->words_for_frame[frame] != MAXwtokenID)
1796 {
1797 word_token* wtoken;
1798 wl->words_for_frame[end_frame] = wl->words_for_frame[frame];
1799 wl->words_for_frame[frame] = MAXwtokenID;
1800 for (tmp = wl->words_for_frame[end_frame]; tmp != MAXwtokenID;
1801 tmp = wtoken->next_token_index)
1802 {
1803 wtoken = &rec->word_token_array[tmp];
1804 wtoken->end_time = frame;
1805 wtoken->word = end_word;
1806 wtoken->end_node = rec->context->end_node;
1807 }
1808 #ifdef _WIN32
1809 PLogError(L("Forced an end path at end frame %d/%d)\n"), frame, end_frame);
1810 #endif
1811 break;
1812 }
1813 }
1814 }
1815 }
1816
1817 /* when there are no more frames of input, this functions
1818 kills all paths not ending at the end node and
1819 creates a word linked list even though there is no WORD_BOUNDARY ilabel */
1820
srec_no_more_frames(srec * rec)1821 void srec_no_more_frames(srec* rec)
1822 {
1823 #if USE_COMP_STATS
1824 frameID end_frame = rec->current_search_frame;
1825 #endif
1826 nodeID end_node;
1827 fsmnode_token* ftoken;
1828 ftokenID current_token_index;
1829 costdata current_word_threshold = MAXcostdata;
1830 wtokenID word_token_index;
1831 int any_nodes_blocked = 0;
1832 altword_token* awtoken;
1833
1834 /* this is just for sanity checking, to find out what the state was
1835 at the end of input */
1836 srec_check_end_of_speech_end(rec);
1837
1838 if (rec->srec_ended) return;
1839 rec->srec_ended = 1;
1840
1841 #if USE_COMP_STATS
1842 comp_stats->total_time += (float)(end_frame / 50.0f);
1843 dump_comp_stats(comp_stats, PSTDOUT);
1844 #endif
1845
1846 end_node = rec->context->end_node;
1847 /*remove all word paths from the priority_q which do not end at end_node
1848 to make space for those being added below */
1849 remove_non_end_word_from_q(rec, rec->word_priority_q, rec->word_token_array,
1850 end_node);
1851
1852 if (rec->current_search_frame == 0)
1853 return;
1854
1855 rec->accumulated_cost_offset[ rec->current_search_frame] =
1856 rec->accumulated_cost_offset[ rec->current_search_frame-1];
1857 rec->cost_offset_for_frame[ rec->current_search_frame] = 0;
1858
1859 /* watch out if using the best_token_for_node[] array here
1860 is it valid? not if multiple recognizers, maybe we
1861 should remember best_token_for_end_node separately */
1862
1863 current_token_index = rec->active_fsmnode_tokens;
1864 while (current_token_index != MAXftokenID)
1865 {
1866 ftoken = &rec->fsmnode_token_array[current_token_index];
1867 if (ftoken->FSMnode_index == end_node)
1868 {
1869 /* print_fsmnode_token(rec, current_token_index, "fsmnode_token at end_node "); */
1870 word_token_index = srec_process_word_boundary_nbest(rec,
1871 ftoken->FSMnode_index,
1872 ftoken->word,
1873 ftoken->word_backtrace,
1874 ftoken->cost, ¤t_word_threshold, &any_nodes_blocked);
1875 if (word_token_index != MAXwtokenID)
1876 {
1877 WORD_TOKEN_SET_WD_ETIME( (rec->word_token_array+word_token_index),
1878 rec->word_token_array[word_token_index].end_time - ftoken->silence_duration);
1879 }
1880 /* now also dump alternatives at this last frame, sep19'03 fixed */
1881 awtoken = ftoken->aword_backtrace;
1882 for (; awtoken != AWTNULL; awtoken = awtoken->next_token)
1883 {
1884 srec_process_word_boundary_nbest(rec,
1885 ftoken->FSMnode_index,
1886 awtoken->word,
1887 awtoken->word_backtrace,
1888 ftoken->cost + awtoken->costdelta,
1889 ¤t_word_threshold,
1890 &any_nodes_blocked);
1891 }
1892 }
1893 current_token_index = ftoken->next_token_index;
1894 }
1895
1896 /* we clobber the word_lattice at the last frame that was created
1897 in do_epsilon_updates() */
1898 word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
1899 lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
1900
1901 if (FST_IsVoiceEnrollment(rec->context) && word_token_index == MAXwtokenID)
1902 {
1903 srec_force_the_end(rec, rec->current_search_frame, rec->context->end_silence_word);
1904 }
1905
1906 /* find the current_best_cost for this recognizer ... at the end node,
1907 it will be used to decide which recognizer wins! */
1908 rec->current_best_cost = lattice_best_cost_to_frame(rec->word_lattice,
1909 rec->word_token_array,
1910 rec->current_search_frame);
1911
1912 }
1913
srec_terminate(srec * rec)1914 void srec_terminate(srec* rec)
1915 {
1916 frameID ifr;
1917 stokenID stoken_index, next_stoken_index;
1918 fsmarc_token* stoken;
1919 ftokenID ftoken_index, next_ftoken_index;
1920 fsmnode_token* ftoken;
1921 wtokenID wtoken_index, next_wtoken_index;
1922 word_token* wtoken;
1923
1924 /* release all state tokens */
1925 for (stoken_index = rec->active_fsmarc_tokens; stoken_index != MAXstokenID;
1926 stoken_index = next_stoken_index)
1927 {
1928 stoken = &rec->fsmarc_token_array[ stoken_index];
1929 next_stoken_index = stoken->next_token_index;
1930 free_fsmarc_token(rec, stoken_index);
1931 }
1932 rec->active_fsmarc_tokens = MAXstokenID;
1933
1934 /* release all fsmnode tokens */
1935 for (ftoken_index = rec->active_fsmnode_tokens; ftoken_index != MAXftokenID;
1936 ftoken_index = next_ftoken_index)
1937 {
1938 ftoken = &rec->fsmnode_token_array[ ftoken_index];
1939 next_ftoken_index = ftoken->next_token_index;
1940 free_fsmnode_token(rec, ftoken_index);
1941 }
1942 rec->active_fsmnode_tokens = MAXftokenID;
1943
1944 /* release all word tokens */
1945 for (ifr = 0; ifr < rec->current_search_frame; ifr++)
1946 {
1947 for (wtoken_index = rec->word_lattice->words_for_frame[ifr];
1948 wtoken_index != MAXwtokenID; wtoken_index = next_wtoken_index)
1949 {
1950 wtoken = &rec->word_token_array[wtoken_index];
1951 next_wtoken_index = wtoken->next_token_index;
1952 free_word_token(rec, wtoken_index);
1953 }
1954 rec->word_lattice->words_for_frame[ifr] = MAXwtokenID;
1955 }
1956 rec->current_model_scores[SILENCE_MODEL_INDEX] = DO_NOT_COMPUTE_MODEL;
1957 rec->current_best_cost = MAXcostdata;
1958 rec->srec_ended = 1;
1959 }
1960 /*------------------------------------------------------------------------*
1961 * *
1962 * main work of the viterbi search *
1963 * *
1964 *------------------------------------------------------------------------*/
1965
1966 /*with new update to FSM node scheme, the sequence of operation is:
1967
1968 for each frame:
1969
1970 1. Handle all internal HMM updates based on new frame observations. This is
1971 done in place with the current list of HMM tokens.
1972
1973 2. For each current active FSM node (from previous frame), activate update
1974 into state 0 (either for existing HMM tokens or for new HMM tokens) by going
1975 through an observation frame (so, only go from an FSM node to a new HMM
1976 token if the first observation frame gets a score above the current pruning
1977 threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left
1978 at the end of this.
1979
1980 3. Prune. Note that the best score will have already been established for
1981 this frame (so therefore the pruning threshold will not change).
1982
1983 4. reset best cost to 0 (to keep scores in range). We can do this here since we already know the best score.
1984
1985 5. For end hmm states which are above the pruning threshold, create new
1986 FSMnode_tokens.
1987
1988 6. update epsilons, including word boundary arcs (which put words onto the word lattice).
1989 epsilon updates go from FSM node to FSM node.
1990
1991 repeat for next frame based on new FSM nodes and current HMMs
1992
1993 */
1994
1995 void srec_viterbi_part1(srec *rec,
1996 const SWIModel *acoustic_models,
1997 pattern_info *pattern,
1998 costdata silence_model_cost);
1999
2000 void srec_viterbi_part2(srec *rec);
2001
multi_srec_viterbi(multi_srec * recm,srec_eos_detector_parms * eosd,pattern_info * pattern,utterance_info * utt_not_used)2002 int multi_srec_viterbi(multi_srec *recm,
2003 srec_eos_detector_parms* eosd,
2004 pattern_info *pattern,
2005 utterance_info* utt_not_used)
2006 {
2007 EOSrc eosrc1 = SPEECH_ENDED, eosrc2 = SPEECH_ENDED;
2008 #if DO_ALLOW_MULTIPLE_MODELS
2009 ASSERT(recm->num_activated_recs == recm->num_swimodels);
2010 if (recm->num_activated_recs == 1)
2011 {
2012 #endif
2013 srec* rec1 = &recm->rec[0];
2014 #if USE_COMP_STATS
2015 start_cs_clock1(&comp_stats->overall_search);
2016 #endif
2017 if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
2018 return 1;
2019 srec_viterbi_part1(&recm->rec[0], recm->swimodel[0], pattern, DO_NOT_COMPUTE_MODEL);
2020 reset_best_cost_to_zero(rec1, rec1->current_best_cost);
2021 reset_cost_offsets(recm, rec1->current_search_frame, rec1->current_best_cost);
2022 rec1->current_prune_delta = rec1->prune_delta;
2023 rec1->current_best_cost = 0;
2024 srec_viterbi_part2(&recm->rec[0]);
2025 eosrc1 = srec_check_end_of_speech(eosd, &recm->rec[0]);
2026 #if USE_COMP_STATS
2027 end_cs_clock1(&comp_stats->overall_search, 1);
2028 #endif
2029
2030 SREC_STATS_UPDATE(&recm->rec[0]);
2031 recm->eos_status = eosrc1;
2032 #if DO_ALLOW_MULTIPLE_MODELS
2033 }
2034 else if (recm->num_activated_recs == 2)
2035 {
2036 srec* rec1 = &recm->rec[0];
2037 srec* rec2 = &recm->rec[1];
2038 const SWIModel* acoustic_models1 = recm->swimodel[0];
2039 const SWIModel* acoustic_models2 = recm->swimodel[1];
2040 costdata diff;
2041 costdata current_best_cost;
2042
2043 ASSERT(rec1->prune_delta == rec2->prune_delta);
2044 /* in part 1 we need to operate by adjusting the prune delta, 'cuz we want
2045 to operate on scores after consumption of a frame */
2046 if ((rec1->current_best_cost > MAXcostdata / 2 && !rec1->srec_ended) ||
2047 (rec2->current_best_cost > MAXcostdata / 2 && !rec2->srec_ended))
2048 {
2049 printf("hey %d %d\n", rec1->current_best_cost, rec2->current_best_cost);
2050 }
2051
2052 /* figure out the prune_delta for the different genders, we
2053 want that pruning should be joint (i.e. prune male and
2054 female relative to overall best). Before part1 we don't
2055 yet know the overall best, so we use the gender score gap
2056 from the last frame, and make the prune the worse gender
2057 accordingly more aggressive */
2058
2059 if (!rec2->srec_ended && rec1->current_best_cost < rec2->current_best_cost)
2060 {
2061 diff = rec2->current_best_cost - rec1->current_best_cost;
2062 if (rec2->current_search_frame >= (rec2->word_lattice->max_frames - 1))
2063 {
2064 return 1;
2065 }
2066 if (diff > rec2->prune_delta)
2067 {
2068 srec_terminate(rec2);
2069 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2070 PLogMessage("T: terminate_viterbi(rec2) @%d", rec2->current_search_frame);
2071 #endif
2072 }
2073 else
2074 rec2->current_prune_delta = rec2->prune_delta - diff;
2075 rec1->current_prune_delta = rec1->prune_delta;
2076 }
2077 else if (!rec1->srec_ended)
2078 {
2079 if (rec1->current_search_frame >= (rec1->word_lattice->max_frames - 1))
2080 {
2081 return 1;
2082 }
2083 diff = rec1->current_best_cost - rec2->current_best_cost;
2084 if (diff > rec1->prune_delta)
2085 {
2086 srec_terminate(rec1);
2087 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2088 PLogMessage("T: terminate_viterbi(rec1) @%d", rec1->current_search_frame);
2089 #endif
2090 }
2091 else
2092 rec1->current_prune_delta = rec1->prune_delta - diff;
2093 rec2->current_prune_delta = rec2->prune_delta;
2094 }
2095
2096 /* now run part1 for each gender */
2097 if (!rec1->srec_ended)
2098 {
2099 srec_viterbi_part1(rec1, acoustic_models1, pattern, DO_NOT_COMPUTE_MODEL);
2100 SREC_STATS_UPDATE(rec1);
2101 }
2102
2103 if (!rec2->srec_ended)
2104 {
2105 srec_viterbi_part1(rec2, acoustic_models2, pattern, rec1->current_model_scores[SILENCE_MODEL_INDEX]);
2106 SREC_STATS_UPDATE(rec2);
2107 }
2108
2109 /* now adjust score offsets, score offsets are shared across genders */
2110
2111 if (rec1->current_best_cost <= rec2->current_best_cost)
2112 {
2113 /* am1 is winning, prune 2 harder */
2114 current_best_cost = rec1->current_best_cost;
2115 reset_cost_offsets(recm, rec1->current_search_frame, current_best_cost);
2116 }
2117 else
2118 {
2119 /* am2 is winning, prune 1 harder */
2120 current_best_cost = rec2->current_best_cost;
2121 reset_cost_offsets(recm, rec2->current_search_frame, current_best_cost);
2122 }
2123
2124 /* jean: some cleanup needed here */
2125 /** best_token_for_arc = rec1->best_token_for_arc;
2126 rec1->best_token_for_arc = 0; **/
2127 if (!rec1->srec_ended)
2128 {
2129 reset_best_cost_to_zero(rec1, current_best_cost);
2130 rec1->current_best_cost = (costdata)(rec1->current_best_cost - (costdata) current_best_cost);
2131 srec_viterbi_part2(rec1);
2132 if (rec1->active_fsmnode_tokens == MAXftokenID)
2133 srec_terminate(rec1);
2134 if (!rec1->srec_ended)
2135 eosrc1 = srec_check_end_of_speech(eosd, rec1);
2136 }
2137 /** rec1->best_token_for_arc = best_token_for_arc;
2138 best_token_for_arc = rec2->best_token_for_arc;
2139 rec2->best_token_for_arc = 0; **/
2140 if (!rec2->srec_ended)
2141 {
2142 reset_best_cost_to_zero(rec2, current_best_cost);
2143 rec2->current_best_cost = (costdata)(rec2->current_best_cost - (costdata) current_best_cost);
2144 srec_viterbi_part2(rec2);
2145 if (rec2->active_fsmnode_tokens == MAXftokenID)
2146 srec_terminate(rec2);
2147 if (!rec2->srec_ended)
2148 eosrc2 = srec_check_end_of_speech(eosd, rec2);
2149 }
2150 /** rec2->best_token_for_arc = best_token_for_arc; **/
2151 SREC_STATS_UPDATE(rec1);
2152 SREC_STATS_UPDATE(rec2);
2153 recm->eos_status = eosrc1;
2154 if (rec1->current_best_cost > rec2->current_best_cost)
2155 recm->eos_status = eosrc2;
2156 }
2157 #endif
2158 return 0;
2159 }
2160
2161
srec_viterbi_part1(srec * rec,const SWIModel * acoustic_models,pattern_info * pattern,costdata silence_model_cost)2162 void srec_viterbi_part1(srec *rec,
2163 const SWIModel *acoustic_models,
2164 pattern_info *pattern,
2165 costdata silence_model_cost)
2166 {
2167 costdata current_best_cost;
2168 /* costdata current_prune_thresh; */
2169 costdata current_prune_delta;
2170 /* the score difference for pruning - can get adjusted below if
2171 pruning gets tighted to keep array sizes in check*/
2172 costdata *current_model_scores;
2173 int num_models_computed;
2174 nodeID num_fsm_nodes_updated;
2175
2176 #if USE_COMP_STATS
2177 start_cs_clock(&comp_stats->models);
2178 #endif
2179
2180 /*first go ahead and compute scores for all models which are needed by the search at this point*/
2181
2182
2183 find_which_models_to_compute(rec, acoustic_models);
2184 /* communication happens via rec->current_model_scores */
2185 #define SCORE_FIRST_SILENCE_ONLY
2186 #ifdef SCORE_FIRST_SILENCE_ONLY
2187 if (silence_model_cost != DO_NOT_COMPUTE_MODEL)
2188 rec->current_model_scores[SILENCE_MODEL_INDEX] = silence_model_cost;
2189 #endif
2190 num_models_computed = compute_model_scores(rec->current_model_scores, acoustic_models, pattern, rec->current_search_frame);
2191 rec->best_model_cost_for_frame[rec->current_search_frame] = best_uint16(rec->current_model_scores, acoustic_models->num_hmmstates);
2192
2193 #if USE_COMP_STATS
2194 end_cs_clock(&comp_stats->models, num_models_computed);
2195 start_cs_clock(&comp_stats->internal_hmm);
2196 #endif
2197
2198 /*get some things out of the rec structure to make things a bit faster*/
2199 current_model_scores = rec->current_model_scores;
2200
2201 /*update search to next frame*/
2202 current_best_cost = MAXcostdata - ((costdata)2) * rec->prune_delta; /*to avoid overflows, must clean up later */
2203 /* current_prune_thresh = MAXcostdata; */
2204 current_prune_delta = rec->current_prune_delta;
2205
2206 /* srec_stats_update(rec, "(...0) "); */
2207 /*------------------------------------------------------------------------*
2208 1. Handle all internal HMM updates based on new frame observations. This is
2209 done in place with the current list of HMM tokens.
2210 *------------------------------------------------------------------------*/
2211
2212 update_internal_hmm_states(rec, ¤t_prune_delta, ¤t_best_cost, current_model_scores);
2213
2214 /* check_if_any_token_better_than_best_cost(rec, rec->active_fsmarc_tokens, current_best_cost, "after update into new");*/
2215
2216 #if USE_COMP_STATS
2217 end_cs_clock(&comp_stats->internal_hmm, rec->num_new_states);
2218 start_cs_clock(&comp_stats->fsm_to_hmm);
2219 #endif
2220
2221 /* srec_stats_update(rec, "(...1) "); */
2222 /*------------------------------------------------------------------------*
2223 2. For each current active FSM node (from previous frame), activate update
2224 into state 0 (either for existing HMM tokens or for new HMM tokens) by going
2225 through an observation frame (so, only go from an FSM node to a new HMM
2226 token if the first observation frame gets a score above the current pruning
2227 threshold). FSM nodes are freed as this is done. So, no FSMnode tokens are left
2228 at the end of this.
2229 *------------------------------------------------------------------------*/
2230
2231 num_fsm_nodes_updated = (nodeID) update_from_current_fsm_nodes_into_new_HMMs(rec, ¤t_prune_delta, ¤t_best_cost, current_model_scores);
2232 /* srec_stats_update(rec, "(...2) "); */
2233 /*------------------------------------------------------------------------*
2234 3. Prune. Note that the best score will have already been established for
2235 this frame (so therefore the pruning threshold will not change).
2236 *------------------------------------------------------------------------*/
2237
2238 #if USE_COMP_STATS
2239 end_cs_clock(&comp_stats->fsm_to_hmm, num_fsm_nodes_updated);
2240 start_cs_clock(&comp_stats->prune);
2241 #endif
2242
2243 prune_new_tokens(rec, (costdata)(current_best_cost + current_prune_delta));
2244
2245 /* it's nice to do word token pruning here 'cuz we only need to traceback
2246 the active_fsmarc_tokens, active_fsmnode_tokens are propogated thereto */
2247
2248 reprune_word_tokens_if_necessary(rec);
2249
2250 rec->current_prune_delta = current_prune_delta;
2251 rec->current_best_cost = current_best_cost;
2252 /* srec_stats_update(rec, "(...3) "); */
2253 #if USE_COMP_STATS
2254 end_cs_clock(&comp_stats->prune, rec->num_new_states);
2255 #endif
2256 }
2257
srec_viterbi_part2(srec * rec)2258 void srec_viterbi_part2(srec *rec)
2259 {
2260 wtokenID word_token_index;
2261 nodeID inode, num_fsm_nodes_updated;
2262 costdata current_prune_delta = rec->current_prune_delta;
2263 costdata current_best_cost = rec->current_best_cost;
2264 ftokenID* ftmp;
2265 int num_updates;
2266
2267 /* first we clear the best_token_for_node array, there are no live
2268 fsmnode_tokens at this point, and we don't want leftovers from
2269 the last frame */
2270 ftmp = rec->best_token_for_node;
2271 for (inode = 0; inode < rec->context->num_nodes; inode++)
2272 *ftmp++ = MAXftokenID;
2273
2274 /*------------------------------------------------------------------------*
2275 4. reset best cost to 0 (to keep scores in range). We can do this here
2276 since we already know the best score. This is done here so that
2277 no fsmnode tokens (there are none active now) need updating. This is also
2278 done here before epsilons - that way we don't need to update the word
2279 tokens .
2280
2281 We assume this was done just before part2.
2282 *------------------------------------------------------------------------*/
2283
2284 #if USE_COMP_STATS
2285 start_cs_clock(&comp_stats->hmm_to_fsm);
2286 #endif
2287
2288 /*------------------------------------------------------------------------*
2289 5. For end hmm states which are above the pruning threshold, create new
2290 FSMnode_tokens.
2291 *------------------------------------------------------------------------*/
2292
2293 num_updates = update_from_hmms_to_fsmnodes(rec, current_prune_delta, current_best_cost);
2294 if (num_updates == 0)
2295 {
2296 num_updates = update_from_hmms_to_fsmnodes(rec, 2 * current_prune_delta, current_best_cost);
2297 SREC_STATS_INC_FORCED_UPDATES();
2298 }
2299 SREC_STATS_UPDATE(rec);
2300
2301 #if USE_COMP_STATS
2302 end_cs_clock(&comp_stats->hmm_to_fsm, rec->num_new_states);
2303 start_cs_clock(&comp_stats->epsilon);
2304 #endif
2305
2306 /* srec_stats_update(rec, "(...5) "); */
2307
2308 /*------------------------------------------------------------------------*
2309 6. update epsilons, including word boundary arcs (which put words onto the word lattice).
2310 epsilon updates go from FSM node to FSM node.
2311 *------------------------------------------------------------------------*/
2312
2313 /*clear priority_q for this frame*/
2314 clear_priority_q(rec->word_priority_q);
2315
2316 num_fsm_nodes_updated = (nodeID) do_epsilon_updates(rec, current_prune_delta, current_best_cost);
2317
2318 #if USE_COMP_STATS
2319 end_cs_clock(&comp_stats->epsilon, num_fsm_nodes_updated);
2320 #endif
2321
2322 /* srec_stats_update(rec, "(...6) "); */
2323 rec->current_search_frame++;
2324
2325 /* no need to prune again after epsilons since they add no new cost - if we
2326 add costs to epsilon arcs (at word boundaries for example), add another
2327 pruning stage */
2328
2329 word_token_index = get_word_token_list(rec->word_priority_q, rec->word_token_array);
2330 lattice_add_word_tokens(rec->word_lattice, rec->current_search_frame, word_token_index);
2331 }
2332
2333 /* get the top choice, trace it back, and find out where speech starts
2334 and ends. this is used for channel normalization */
2335
WHICH_RECOG(multi_srec * rec)2336 static srec* WHICH_RECOG(multi_srec* rec)
2337 {
2338 #if DO_ALLOW_MULTIPLE_MODELS
2339 srec* return_rec = NULL;
2340 costdata current_best_cost = MAXcostdata;
2341 int i = 0;
2342 for (i = 0; i < rec->num_activated_recs; i++)
2343 {
2344 if (current_best_cost > rec->rec[i].current_best_cost)
2345 {
2346 current_best_cost = rec->rec[i].current_best_cost;
2347 return_rec = &rec->rec[i];
2348 }
2349 }
2350 return return_rec;
2351 #else
2352 return &rec->rec[0];
2353 #endif
2354 }
2355
multi_srec_get_speech_bounds(multi_srec * recm,frameID * start_frame,frameID * end_frame)2356 void multi_srec_get_speech_bounds(multi_srec* recm, frameID* start_frame, frameID* end_frame)
2357 {
2358 frameID csf;
2359 wtokenID token_index;
2360 wordID last_word;
2361 srec* rec = WHICH_RECOG(recm);
2362
2363 *start_frame = *end_frame = 0;
2364
2365 if (!rec)
2366 return;
2367 csf = rec->current_search_frame;
2368 token_index = rec->word_lattice->words_for_frame[csf];
2369 last_word = MAXwordID;
2370 while (token_index != MAXwtokenID)
2371 {
2372 word_token* wtoken = &rec->word_token_array[token_index];
2373 word_token* next_wtoken;
2374
2375 if (wtoken->word == rec->context->beg_silence_word)
2376 {
2377 if (*start_frame == 0) *start_frame = wtoken->end_time;
2378 }
2379 if (wtoken->word == rec->context->hack_silence_word)
2380 {
2381 if (wtoken->backtrace != MAXwtokenID)
2382 {
2383 next_wtoken = &rec->word_token_array[wtoken->backtrace];
2384 if (next_wtoken->word == rec->context->beg_silence_word)
2385 *start_frame = wtoken->end_time;
2386 }
2387 }
2388
2389 if (last_word == rec->context->end_silence_word)
2390 {
2391 *end_frame = wtoken->end_time;
2392 if (wtoken->word == rec->context->hack_silence_word
2393 && wtoken->backtrace != MAXwtokenID)
2394 {
2395 next_wtoken = &rec->word_token_array[wtoken->backtrace];
2396 *end_frame = WORD_TOKEN_GET_WD_ETIME( next_wtoken);
2397 }
2398 }
2399 if (token_index == wtoken->backtrace)
2400 {
2401 /* infinite loop! */
2402 PLogError ("warning: breaking infinite loop\n");
2403 *end_frame = 0;
2404 break;
2405 }
2406 token_index = wtoken->backtrace;
2407 last_word = wtoken->word;
2408 }
2409 }
2410
multi_srec_get_eos_status(multi_srec * rec)2411 int multi_srec_get_eos_status(multi_srec* rec)
2412 {
2413 int rc;
2414 ASSERT(rec);
2415 rc = (int)rec->eos_status;
2416 if (rc < 0) rc = 0;
2417 return rc;
2418 }
2419
2420 /*
2421 ToDo List:
2422
2423 end-pointing
2424 duration
2425 channel normalization
2426 re-use and appropriate killing of word_tokens
2427 pruning fsmnode_tokens
2428 astar backward for alternative choices
2429
2430 minimized graphs and word merging
2431 Johans idea:
2432 When propagating a fsmarc_token, we need to remember the word.id when it
2433 is observed. Let's continue to use fsmarc_token->word[] to remember those.
2434 When merging 2+ fsmarc_tokens into a fsmnode_token, we need remember
2435 both histories, not just the best. All histories and maintained on a linked
2436 list, with word_token->next_token_index serving as links, somehow we also
2437 remember the cost offset from one link to the next and keep track of that.
2438 Try to create the word_token as late a possible, so as to keep usage down.
2439 The list should be sorted so that we can drop things off the end, Ie. don't
2440 need to keep all word, a max of 10 is fine cuz that's the most we'll need
2441 to drop off at a .wb anyways!
2442
2443 altwords .. working .. now cpu optimize ... ideas
2444 use only the head refcount, #define the refcopy, not a function
2445 free_altword_token_batch() should not double check for AWTNULL
2446 BUILD & BUILD_DEBUG in selected areas
2447 reprune_altword_token_batch ... change costbasis to a tag ... to say (already repruned)
2448
2449
2450
2451 endpointing
2452 at grammar prepare ...
2453 get the list of endnodes ... get the list of opendnodes
2454 ... start from the graph's endnode, walk backwards on all null or silence arcs, find the nodes which have a silence or null path to the end: those are sinknodes
2455 ... sinknodes are endnodes or opendnodes ... the sinknodesO are the sinknodes that do go to speech arcs .. the sinknodes1 are the sinknodes that do not go to any speech arcs
2456 ... walkforward all sinknodes0 through iwt arcs, those are openendnodes
2457 ... walkforward all sinknodes1 through iwt arcs, those are endnodes
2458 get the top score fsmnode_token ...
2459 ... is it on an endnode ... has this been the top choice for the last 30 frames
2460 ... is it on an optional endnode ... has this neen the top choice for the last 50 frames?
2461
2462 */
2463