1 /*---------------------------------------------------------------------------*
2 * word_lattice.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #ifndef _RTT
21 #include "pstdio.h"
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <math.h>
26 #include "passert.h"
27 #include "portable.h"
28
29
30 #include "portable.h"
31
32 #include "hmm_desc.h"
33 #include "utteranc.h"
34 #include "hmmlib.h"
35
36 #include "srec_sizes.h"
37 #include "search_network.h"
38 #include "srec.h"
39 #include "word_lattice.h"
40 #include "astar.h"
41 #include "srec_stats.h"
42 #include "srec_results.h"
43
44 #define PRINT_WORD_LATTICE 0
45 #define PRINT_SEARCH_DETAILS 0
46
47 #define TRUE_KILL_WTOKEN(WT) { WT.cost = MAXcostdata; \
48 WT.word = MAXwordID; \
49 WT.end_time = MAXframeID; \
50 WT.end_node = MAXnodeID; \
51 WT.backtrace = MAXwtokenID; \
52 }
53
allocate_word_lattice(frameID max_frames)54 srec_word_lattice *allocate_word_lattice(frameID max_frames)
55 {
56 srec_word_lattice *wl;
57
58 wl = (srec_word_lattice*) CALLOC_CLR(1, sizeof(srec_word_lattice), "search.word_lattice.base");
59 wl->max_frames = max_frames;
60 wl->words_for_frame = (wtokenID*) CALLOC_CLR(max_frames, sizeof(wtokenID), "search.word_lattice.words");
61
62 wl->whether_sorted = (asr_int16_t*)CALLOC_CLR(max_frames, sizeof(asr_int16_t), "search.word_lattice.sflag");
63
64 return wl;
65 }
66
destroy_word_lattice(srec_word_lattice * wl)67 void destroy_word_lattice(srec_word_lattice* wl)
68 {
69 FREE(wl->words_for_frame);
70 FREE(wl->whether_sorted);
71 FREE(wl);
72 }
73
initialize_word_lattice(srec_word_lattice * wl)74 void initialize_word_lattice(srec_word_lattice* wl)
75 {
76 frameID ifr;
77 for (ifr = 0; ifr < wl->max_frames; ifr++)
78 {
79 wl->words_for_frame[ifr] = MAXwtokenID;
80 wl->whether_sorted[ifr] = 0;
81 }
82 }
83
lattice_best_cost_to_frame(srec_word_lattice * wl,word_token * word_token_array,frameID ifr)84 costdata lattice_best_cost_to_frame(srec_word_lattice *wl, word_token* word_token_array, frameID ifr)
85 {
86 int sanity_counter = 0;
87 costdata best_cost = MAXcostdata;
88 wtokenID wtoken_index = wl->words_for_frame[ ifr];
89 while (wtoken_index != MAXwtokenID)
90 {
91 word_token* wtoken = &word_token_array[wtoken_index];
92 if (sanity_counter++ > 200) return MAXcostdata;
93 wtoken_index = wtoken->next_token_index;
94 if (best_cost > wtoken->cost)
95 best_cost = wtoken->cost;
96 }
97 return best_cost;
98 }
99
lattice_add_word_tokens(srec_word_lattice * wl,frameID frame,wtokenID word_token_list_head)100 void lattice_add_word_tokens(srec_word_lattice *wl, frameID frame,
101 wtokenID word_token_list_head)
102 {
103 if (frame >= wl->max_frames)
104 {
105 log_report("lattice_add_word_tokens: max_frame not big enough\n");
106 ASSERT(0);
107 }
108 wl->words_for_frame[frame] = word_token_list_head;
109 }
110
print_word_token_backtrace(srec * rec,wtokenID wtoken_index,char * tail)111 void print_word_token_backtrace(srec* rec, wtokenID wtoken_index, char* tail)
112 {
113 char* null = "NULL", *p;
114 char iwttime[8] = { 0, 0, 0, 0, 0, 0, 0, 0};
115 bigcostdata cost;
116 bigcostdata cost_for_word;
117 word_token *wtoken, *last_wtoken;
118
119 last_wtoken = NULL;
120 while (wtoken_index != MAXwtokenID)
121 {
122 wtoken = &rec->word_token_array[wtoken_index];
123 if (wtoken->word < rec->context->olabels->num_words)
124 p = rec->context->olabels->words[wtoken->word];
125 else
126 p = null;
127 ASSERT(!last_wtoken || last_wtoken->end_time > wtoken->end_time);
128 ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
129 cost = wtoken->cost + rec->accumulated_cost_offset[ wtoken->end_time];
130
131 if (wtoken->backtrace != MAXwtokenID)
132 {
133 word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
134 cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[ next_wtoken->end_time];
135 }
136 else
137 {
138 cost_for_word = cost;
139 }
140 sprintf(iwttime, "/%d", WORD_TOKEN_GET_WD_ETIME(wtoken) );
141 PLogMessage (" (%d W%d %s cost=%d/%d/%d time=%d%s node=%d)", wtoken_index, wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, iwttime, wtoken->end_node);
142 fflush(stdout);
143 ASSERT(wtoken->backtrace != wtoken_index);
144 wtoken_index = wtoken->backtrace;
145 last_wtoken = wtoken;
146 }
147 PLogMessage (tail);
148 }
149
sprint_bword_token_backtrace(char * buf,int buflen,srec * rec,wtokenID wtoken_index)150 int sprint_bword_token_backtrace(char* buf, int buflen, srec* rec, wtokenID wtoken_index)
151 {
152 char* null = "NULL", *p;
153 char *pbuf = buf;
154 *pbuf = 0;
155
156 while (wtoken_index != MAXwtokenID)
157 {
158 word_token* wtoken = &rec->word_token_array[wtoken_index];
159 p = null;
160 if (wtoken->word < rec->context->olabels->num_words)
161 p = rec->context->olabels->words[wtoken->word];
162 ASSERT(pbuf + strlen(p) + 1 < buf + buflen);
163 pbuf += sprintf(pbuf, "%s ", p);
164 ASSERT(wtoken->backtrace != wtoken_index);
165
166 wtoken_index = wtoken->backtrace;
167 }
168 if (pbuf > buf && *(pbuf - 1) == ' ') *(pbuf - 1) = 0;
169 return 0;
170 }
171
172 #define ERROR_TRANSCRIPTION_TOO_LONG -1
173
sprint_word_token_backtraceByWordID(wordID * wordIDs,size_t * len,srec * rec,wtokenID wtoken_index)174 ESR_ReturnCode sprint_word_token_backtraceByWordID(wordID* wordIDs, size_t* len, srec* rec, wtokenID wtoken_index)
175 {
176 size_t i, currentLen = 0;
177 ESR_ReturnCode rc;
178 word_token* wtoken;
179
180 #if PRINT_SEARCH_DETAILS
181 printf("in get backtrace wtoken %d\n", wtoken_index);
182 #endif
183
184 while (wtoken_index != MAXwtokenID)
185 {
186 if (*len <= currentLen)
187 {
188 rc = ESR_BUFFER_OVERFLOW;
189 PLogError(ESR_rc2str(rc));
190 *len = currentLen + 1;
191 goto CLEANUP;
192 }
193 wtoken = &rec->word_token_array[wtoken_index];
194 wordIDs[currentLen] = wtoken->word;
195 ++currentLen;
196
197 if (wtoken_index == wtoken->backtrace)
198 {
199 *len = 0;
200 PLogError("Result is loopy, rejecting");
201 return ESR_INVALID_STATE;
202 }
203 wtoken_index = wtoken->backtrace;
204 }
205
206 /* reverse the order */
207 for (i = 0; i < currentLen / 2; i++)
208 {
209 wordID tmp = wordIDs[i];
210 wordIDs[i] = wordIDs[(currentLen-1-i)];
211 wordIDs[(currentLen-1-i)] = tmp;
212 }
213 /* strip the pau/pau2 markers */
214 if (currentLen >= 1 && wordIDs[0] == rec->context->beg_silence_word)
215 {
216 for (i = 0; i < currentLen - 1; i++)
217 wordIDs[i] = wordIDs[i+1];
218 currentLen--;
219 }
220 if (currentLen >= 1 && wordIDs[currentLen-1] == rec->context->end_silence_word)
221 currentLen--;
222 wordIDs[currentLen] = MAXwordID;
223 *len = currentLen;
224
225 return ESR_SUCCESS;
226 CLEANUP:
227 return rc;
228 }
229
sprint_word_token_backtrace(char * transcription,int len,srec * rec,wtokenID wtoken_index)230 int sprint_word_token_backtrace(char *transcription, int len, srec* rec, wtokenID wtoken_index)
231 {
232 char *w;
233 char *from_p;
234 char *to_p;
235 char *end;
236 char *tr_end = transcription;
237 int wlen;
238
239 #define SHOW_END_TIMES 1
240 #if SHOW_END_TIMES
241 char buf[256/*64*/];
242 #endif
243
244 *transcription = 0;
245
246 #if PRINT_SEARCH_DETAILS
247 printf("in get backtrace wtoken %d\n", wtoken_index);
248 #endif
249
250 while (wtoken_index != MAXwtokenID)
251 {
252 word_token* wtoken = &rec->word_token_array[wtoken_index];
253 #if PRINT_SEARCH_DETAILS
254 printf("got token %d word %d\n", wtoken_index, wtoken->word);
255 #endif
256
257 w = "NULL";
258 if (wtoken->word < rec->context->olabels->num_words)
259 w = rec->context->olabels->words[wtoken->word];
260 #if SHOW_END_TIMES
261 /* should be defined outside because it is used outside by w */
262 /* sprintf(buf,"%s@%d.%d",w, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_time); */
263 if (strlen(w) + 12 > sizeof(buf))
264 {
265 *transcription = 0;
266 return ERROR_TRANSCRIPTION_TOO_LONG;
267 }
268 else
269 {
270 sprintf(buf, "%s@%d", w, wtoken->end_time);
271 w = &buf[0];
272 }
273 #endif
274 wlen = strlen(w);
275 if (wlen + tr_end - transcription + 1 >= len)
276 {
277 *transcription = 0;
278 return ERROR_TRANSCRIPTION_TOO_LONG;
279 }
280 /*need to tack onto beginning, so move string over*/
281 from_p = tr_end;
282 to_p = tr_end + wlen + 1;
283 tr_end = to_p;
284 while (from_p >= transcription) *(to_p--) = *(from_p--);
285
286 /* add a space*/
287 *to_p = ' ';
288
289 /*add the new word*/
290 to_p = transcription;
291 end = to_p + wlen;
292
293 while (to_p < end) *(to_p++) = *(w++);
294
295 if (wtoken_index == wtoken->backtrace)
296 {
297 *transcription = 0;
298 #if BUILD&BUILD_DEBUG
299 printf("Error: result is loopy, rejecting\n");
300 #endif
301 return ERROR_RESULT_IS_LOOPY;
302 }
303 wtoken_index = wtoken->backtrace;
304 }
305 return 0;
306 }
307
print_word_token(srec * rec,wtokenID wtoken_index,char * msg)308 void print_word_token(srec* rec, wtokenID wtoken_index, char* msg)
309 {
310 bigcostdata cost, cost_for_word;
311 char *p = "NULL";
312 word_token* wtoken = &rec->word_token_array[wtoken_index];
313
314 PLogMessage ( msg );
315 if (wtoken->word < rec->context->olabels->num_words)
316 p = rec->context->olabels->words[wtoken->word];
317 ASSERT(rec->accumulated_cost_offset[ wtoken->end_time] != 0);
318 cost = wtoken->cost + rec->accumulated_cost_offset[wtoken->end_time];
319 if (wtoken->backtrace != MAXwtokenID)
320 {
321 word_token* next_wtoken = &rec->word_token_array[wtoken->backtrace];
322 cost_for_word = cost - next_wtoken->cost - rec->accumulated_cost_offset[next_wtoken->end_time];
323 }
324 else
325 {
326 cost_for_word = cost;
327 }
328 printf("wtoken %d W%i %s cost=%d/%d/%d time=%d/%d node=%d", wtoken_index,
329 wtoken->word, p, wtoken->cost, cost, cost_for_word, wtoken->end_time, WORD_TOKEN_GET_WD_ETIME(wtoken), wtoken->end_node);
330 pfflush(PSTDOUT);
331 print_word_token_backtrace(rec, wtoken->backtrace, "\n");
332 }
333
334
print_word_token_list(srec * rec,wtokenID wtoken_index,char * msg)335 void print_word_token_list(srec* rec, wtokenID wtoken_index, char* msg)
336 {
337 #ifndef NDEBUG
338 int sanity_counter = 0;
339 #endif
340 PLogMessage ( msg );
341 while (wtoken_index != MAXwtokenID)
342 {
343 word_token* wtoken = &rec->word_token_array[wtoken_index];
344 print_word_token(rec, wtoken_index, "");
345 ASSERT(sanity_counter++ < 200);
346 ASSERT(wtoken_index != wtoken->next_token_index);
347 wtoken_index = wtoken->next_token_index;
348 }
349 }
350
351 #define MAX_LEN 256
srec_get_result(srec * rec)352 void srec_get_result(srec *rec)
353 {
354 srec_word_lattice *wl;
355 frameID i;
356 wtokenID token_index;
357 word_token *wtoken;
358
359 #if PRINT_SEARCH_DETAILS
360 printf("in srec_get_result\n");
361 #endif
362
363 wl = rec->word_lattice;
364 #if PRINT_WORD_LATTICE
365 for (i = 0; i <= rec->current_search_frame; i++)
366 {
367 #else
368 for (i = rec->current_search_frame; i <= rec->current_search_frame; i++)
369 {
370 #endif
371
372 /* put the best choice at the top */
373 sort_word_lattice_at_frame(rec, i);
374 token_index = wl->words_for_frame[i];
375
376 #if PRINT_WORD_LATTICE
377 printf("----- List of words for frame %d\n", i);
378 print_word_token_list(rec, token_index, "");
379 #endif
380
381 if (i == rec->current_search_frame && token_index != MAXwtokenID)
382 {
383 wtoken = &(rec->word_token_array[token_index]);
384 print_word_token(rec, token_index, "Final Top Choice: ");
385 }
386 }
387 }
388
389 static srec* WHICH_RECOG(multi_srec* rec)
390 {
391 srec* return_rec = NULL;
392 costdata current_best_cost = MAXcostdata;
393 int i = 0;
394 #if DO_ALLOW_MULTIPLE_MODELS
395 for (i = 0; i < rec->num_activated_recs; i++)
396 {
397 #endif
398 if (current_best_cost > rec->rec[i].current_best_cost)
399 {
400 current_best_cost = rec->rec[i].current_best_cost;
401 return_rec = &rec->rec[i];
402 }
403 #if DO_ALLOW_MULTIPLE_MODELS
404 }
405 #endif
406 return return_rec;
407 }
408
409 ESR_ReturnCode srec_get_top_choice_wordIDs(multi_srec* recm, wordID* wordIDs, size_t* len)
410 {
411 srec* rec = WHICH_RECOG(recm);
412 frameID end_frame;
413 srec_word_lattice* wl;
414 wtokenID token_index;
415 ESR_ReturnCode rc;
416
417 if (!rec)
418 {
419 rc = ESR_INVALID_STATE;
420 PLogError(ESR_rc2str(rc));
421 goto CLEANUP;
422 }
423
424 end_frame = rec->current_search_frame;
425 wl = rec->word_lattice;
426 token_index = wl->words_for_frame[end_frame];
427
428 if (token_index == MAXwtokenID)
429 {
430 PLogError(L("ESR_INVALID_STATE"));
431 return ESR_INVALID_STATE;
432 }
433 #if PRINT_WORD_LATTICE
434 print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
435 #endif
436 /* the head of the list on the last frame is always best */
437 CHKLOG(rc, sprint_word_token_backtraceByWordID(wordIDs, len, rec, token_index));
438
439 return ESR_SUCCESS;
440 CLEANUP:
441 return rc;
442 }
443
444 int srec_get_top_choice_transcription(multi_srec* recm, char *transcription, int len, int whether_strip_slot_markers)
445 {
446 int rc;
447 srec* rec = WHICH_RECOG(recm);
448 frameID end_frame;
449 srec_word_lattice* wl;
450 wtokenID token_index;
451
452 if (!rec)
453 {
454 *transcription = 0;
455 return 1;
456 }
457 if( recm->eos_status == VALID_SPEECH_NOT_YET_DETECTED)
458 {
459 *transcription = 0;
460 return 1;
461 }
462
463 end_frame = rec->current_search_frame;
464 wl = rec->word_lattice;
465 sort_word_lattice_at_frame(rec, end_frame);
466 token_index = wl->words_for_frame[end_frame];
467
468 if (token_index != MAXwtokenID)
469 {
470 #if PRINT_WORD_LATTICE
471 print_word_token_list(rec, token_index, "WORD TOKENS AT END\n");
472 #endif
473 /* the head of the list on the last frame is always best */
474 rc = sprint_word_token_backtrace(transcription, len, rec, token_index);
475 }
476 else
477 {
478 strcpy(transcription, "");
479 rc = 1;
480 }
481 if (whether_strip_slot_markers)
482 srec_result_strip_slot_markers(transcription);
483 return rc;
484 }
485
486 int srec_get_top_choice_score(multi_srec* recm, bigcostdata *cost, int do_incsil)
487 {
488 srec* rec = WHICH_RECOG(recm);
489 frameID end_frame;
490 srec_word_lattice* wl;
491 wtokenID token_index;
492 word_token* wtoken;
493
494 if (!rec)
495 {
496 *cost = MAXcostdata;
497 return 1;
498 }
499
500 end_frame = rec->current_search_frame;
501 wl = rec->word_lattice;
502 token_index = wl->words_for_frame[end_frame];
503
504 if (end_frame < MAXframeID && token_index != MAXwtokenID)
505 {
506 wtoken = &rec->word_token_array[token_index];
507 *cost = wtoken->cost;
508 *cost += rec->accumulated_cost_offset[ wtoken->end_time];
509 return 0;
510 }
511 else
512 {
513 *cost = MAXcostdata;
514 return 1;
515 }
516 }
517
518 int srec_print_results(multi_srec *recm, int max_choices)
519 {
520 char transcription[MAX_LEN];
521 bigcostdata cost;
522
523 srec_get_top_choice_transcription(recm, transcription, MAX_LEN, 1);
524 srec_get_top_choice_score(recm, &cost, SCOREMODE_INCLUDE_SILENCE);
525
526 log_report("R: %8ld %8ld %s\t%.1f\n", 0, 0, transcription, cost);
527
528 return 0;
529 }
530
531 /* sort the word lattice at this frame, todo: remove rec argument */
532
533 #define MAX_WTOKENS_AT_FRAME 64 /* +1 for the MAXwtokenID! */
534 void sort_word_lattice_at_frame(srec* rec, frameID frame)
535 {
536 srec_word_lattice* wl = rec->word_lattice;
537 word_token *wtoken, *wtoken2;
538 wtokenID pwi[MAX_WTOKENS_AT_FRAME], token_index;
539 word_token* word_token_array = rec->word_token_array;
540 int i, j, npwi = 0;
541 ASSERT(rec->word_priority_q->max_in_q < MAX_WTOKENS_AT_FRAME);
542
543 ASSERT(frame < wl->max_frames);
544 if (wl->whether_sorted[frame])
545 return;
546
547 wl->whether_sorted[frame] = 1;
548
549 /* make an array of word token index addresses */
550 for (pwi[npwi] = wl->words_for_frame[frame]; pwi[npwi] != MAXwtokenID;)
551 {
552 ASSERT(npwi < MAX_WTOKENS_AT_FRAME);
553 token_index = pwi[npwi];
554 wtoken = &word_token_array[ token_index];
555 npwi++;
556 pwi[npwi] = wtoken->next_token_index;
557 }
558
559 /* sort the word token indices, bubble sort is fine */
560 for (i = 0; i < npwi; i++)
561 {
562 for (j = 0; j < (npwi - 1); j++)
563 {
564 wtoken = &word_token_array[ pwi[j]];
565 wtoken2 = &word_token_array[ pwi[j+1]];
566 if (wtoken->cost > wtoken2->cost)
567 {
568 token_index = pwi[j];
569 pwi[j] = pwi[j+1];
570 pwi[j+1] = token_index;
571 }
572 }
573 }
574
575 /*print_word_token_list(rec,wl->words_for_frame[frame],"## BEFORE SORT\n");*/
576 wl->words_for_frame[ frame] = pwi[0];
577 for (i = 0; i < npwi; i++)
578 {
579 wtoken = &word_token_array[ pwi[i]];
580 wtoken->next_token_index = pwi[i+1]; /* last points nowhwere */
581 }
582 /*print_word_token_list(rec,wl->words_for_frame[frame],"## AFTER SORT\n");*/
583 }
584
585
586 /* this frees a word token, it may still have references in the lattice though */
587
588 void free_word_token(srec *rec, wtokenID old_token_index)
589 {
590 word_token* wtoken;
591 wtoken = &rec->word_token_array[old_token_index];
592 wtoken->next_token_index = rec->word_token_freelist;
593 rec->word_token_freelist = old_token_index;
594 TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
595 }
596
597 /* this frees some earlier allocated word_tokens from previous frames,
598 this makes sure we can always have some to spare for future frames */
599
600 void free_word_token_from_lattice(srec *rec, wtokenID old_token_index)
601 {
602 word_token* wtoken;
603 wtokenID *rtoken_index;
604 word_token* rtoken;
605
606 #define CHECK_FREE_WORD_TOKEN 1
607 #if CHECK_FREE_WORD_TOKEN
608 stokenID stoken_index, i;
609 ftokenID ftoken_index;
610 fsmarc_token* stoken;
611 fsmnode_token* ftoken;
612 int nerrs = 0;
613
614 stoken_index = rec->active_fsmarc_tokens;
615 for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
616 {
617 stoken = &rec->fsmarc_token_array[stoken_index];
618 for (i = 0; i < stoken->num_hmm_states; i++)
619 {
620 if (stoken->word_backtrace[i] == old_token_index)
621 {
622 printf("Error: can't delete wtoken %d cuz stoken%d.%d cost %d\n",
623 old_token_index, stoken_index, i, stoken->cost[i]);
624 nerrs++;
625 }
626 }
627 }
628
629 ftoken_index = rec->active_fsmnode_tokens;
630 for (; ftoken_index != MAXftokenID; ftoken_index = ftoken->next_token_index)
631 {
632 ftoken = &rec->fsmnode_token_array[ftoken_index];
633 if (ftoken->word_backtrace == old_token_index)
634 {
635 printf("Error: can't delete wtoken %d cuz ftoken %d cost %d\n",
636 old_token_index, ftoken_index, ftoken->cost);
637 nerrs++;
638 }
639 }
640
641 /* wtoken = &rec->word_token_array[old_token_index];
642 for(ifr=wtoken->end_time+1; ifr>=0; ifr--) {
643 wtoken_index = rec->word_lattice->words_for_frame[ifr];
644 for( ; wtoken_index!= MAXwtokenID; wtoken_index=wtoken->next_token_index) {
645 wtoken = &rec->word_token_array[wtoken_index];
646 if(wtoken->backtrace == old_token_index) {
647 printf("Error: can't delete wtoken %d cuz wtoken %d at frame %d backtraces cost %d\n",
648 old_token_index, wtoken_index, ifr, wtoken->cost);
649 nerrs++;
650 }
651 }
652 }
653 */
654 ASSERT(nerrs == 0);
655 if (nerrs > 0)
656 {
657 print_word_token(rec, old_token_index, "Error: while deleting ");
658 return;
659 }
660 #endif
661
662 wtoken = &rec->word_token_array[old_token_index];
663 /* remove from word lattice */
664 rtoken_index = &rec->word_lattice->words_for_frame[ wtoken->end_time+1];
665 for (; (*rtoken_index) != MAXwtokenID; rtoken_index = &rtoken->next_token_index)
666 {
667 rtoken = &rec->word_token_array[(*rtoken_index)];
668 if (*rtoken_index == old_token_index)
669 {
670 *rtoken_index = wtoken->next_token_index;
671 break;
672 }
673 }
674 wtoken->next_token_index = rec->word_token_freelist;
675 rec->word_token_freelist = old_token_index;
676 TRUE_KILL_WTOKEN(rec->word_token_array[rec->word_token_freelist]);
677 }
678
679 int reprune_word_tokens(srec* rec, costdata current_best_cost)
680 {
681 int i, keep_astar_prune;
682 arc_token* keep_arc_token_list;
683
684 stokenID stoken_index;
685 fsmarc_token* stoken;
686 wtokenID btindex;
687 word_token* bttoken;
688 wtokenID wtoken_index;
689 word_token* wtoken;
690 altword_token* awtoken;
691
692 /* remember things about the astar before changing it for local purposes */
693 keep_astar_prune = rec->astar_stack->prune_delta;
694 /* rec->astar_stack->prune_delta = 400; */
695 /* ignore the grammar constraints for this quick astar backward pass */
696 keep_arc_token_list = rec->context->arc_token_list;
697 rec->context->arc_token_list = 0;
698
699 /* we will flag all wtokens to be kept */
700
701 /* initialize the flags to keep all */
702 memset(rec->word_token_array_flags, 0, sizeof(rec->word_token_array_flags[0])*rec->word_token_array_size);
703
704 /* flag all those tokens not active, ie already free */
705 wtoken_index = rec->word_token_freelist;
706 for (; wtoken_index != MAXwtokenID; wtoken_index = wtoken->next_token_index)
707 {
708 wtoken = &rec->word_token_array[wtoken_index];
709 rec->word_token_array_flags[wtoken_index]--; /* already deleted */
710 }
711
712 /* flag along the best active state paths */
713 stoken_index = rec->active_fsmarc_tokens;
714 for (; stoken_index != MAXstokenID; stoken_index = stoken->next_token_index)
715 {
716 stoken = &rec->fsmarc_token_array[ stoken_index];
717 for (i = 0; i < stoken->num_hmm_states; i++)
718 {
719 btindex = stoken->word_backtrace[i];
720 for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
721 {
722 bttoken = &rec->word_token_array[ btindex];
723 ASSERT(rec->word_token_array_flags[ btindex] >= 0);
724 rec->word_token_array_flags[ btindex] = 1;
725 }
726 for (awtoken = stoken->aword_backtrace[i]; awtoken;
727 awtoken = awtoken->next_token)
728 {
729 btindex = awtoken->word_backtrace;
730 for (; btindex != MAXwtokenID; btindex = bttoken->backtrace)
731 {
732 bttoken = &rec->word_token_array[ btindex];
733 rec->word_token_array_flags[ btindex] = 1;
734 }
735 }
736 }
737 }
738
739 /* run the astar and flag a little more */
740 astar_stack_prepare_from_active_search(rec->astar_stack, 100, rec);
741 astar_stack_do_backwards_search(rec, 100);
742 astar_stack_flag_word_tokens_used(rec->astar_stack, rec);
743 astar_stack_clear(rec->astar_stack);
744
745 /* kill_word_tokens */
746 for (i = 0; i < rec->word_token_array_size; i++)
747 {
748 if (rec->word_token_array_flags[i] == 0) /* < 0 are already free! */
749 free_word_token_from_lattice(rec, (frameID)i);
750 }
751
752 /* set this back to a regular astar from remembered values */
753 rec->context->arc_token_list = keep_arc_token_list;
754 rec->astar_stack->prune_delta = (costdata) keep_astar_prune;
755
756 SREC_STATS_INC_WTOKEN_REPRUNES(1);
757 return 0;
758 }
759
760