• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  grxmlcompile.cpp  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "ptypes.h"
21 
22 #include <unordered_map>
23 
24 #include "fst/lib/fst.h"
25 #include "fst/lib/fstlib.h"
26 #include "fst/lib/arc.h"
27 #include "fst/lib/fst-decl.h"
28 #include "fst/lib/vector-fst.h"
29 #include "fst/lib/arcsort.h"
30 #include "fst/lib/invert.h"
31 
32 #include "fst-io.h"
33 
34 #include "ESR_Locale.h"
35 #include "LCHAR.h"
36 #include "pstdio.h"
37 #include "PFileSystem.h"
38 #include "PANSIFileSystem.h"
39 #include "plog.h"
40 #include "pmemory.h"
41 #include "ESR_Session.h"
42 #include "SR_Session.h"
43 #include "SR_Vocabulary.h"
44 #include "srec_arb.h"       // for EPSILON_LABEL etc
45 #include <fstream>
46 #include <iostream>
47 #include "tinyxml.h"
48 #include "grxmldoc.h"
49 
50 #ifdef MEMTRACE
51 #include <mcheck.h>
52 #endif
53 
54 #define OPENFST_ACKNOWLEDGEMENT	\
55 	"This tool uses the OpenFst library. \n" \
56  "Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
57 " you may not use this file except in compliance with the License.\n" \
58 " You may obtain a copy of the License at" \
59 "\n" \
60 "      http://www.apache.org/licenses/LICENSE-2.0\n" \
61 "\n" \
62 " Unless required by applicable law or agreed to in writing, software\n" \
63 " distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
64 " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
65 " See the License for the specific language governing permissions and\n" \
66 " limitations under the License.\n" \
67 "\n" \
68 " This library was developed at Google Research (M. Riley, J. Schalkwyk, W. Skut) and NYU's Courant Institute (C. Allauzen, M. Mohri). It is intended to be comprehensive, flexible, efficient and scale well to large problems. It is an open source project distributed under the Apache license. \n"
69 
70 
71 #define TINYXML_ACKNOWLEDGEMENT	\
72 	"This tool uses the tinyxml library. \n" \
73 "Copyright (c) 2007 Project Admins: leethomason \n" \
74 "The TinyXML software is provided 'as-is', without any express or implied\n" \
75 "warranty. In no event will the authors be held liable for any damages\n" \
76 "arising from the use of this software.\n" \
77 "\n" \
78 "Permission is granted to anyone to use this software for any purpose,\n" \
79 "including commercial applications, and to alter it and redistribute it\n" \
80 "freely, subject to the following restrictions:\n"
81 
82 #define NUANCE_COPYRIGHT \
83 "// grxmlcompile\n" \
84 "//\n" \
85 "// Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
86 "// you may not use this file except in compliance with the License.\n" \
87 "// You may obtain a copy of the License at\n" \
88 "//\n" \
89 "//      http://www.apache.org/licenses/LICENSE-2.0\n" \
90 "//\n" \
91 "// Unless required by applicable law or agreed to in writing, software\n" \
92 "// distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
93 "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
94 "// See the License for the specific language governing permissions and\n" \
95 "// limitations under the License.\n" \
96 "//\n" \
97 "// This program compiles a .grxml grammar into the graphs needed for \n" \
98 "// decoding with SREC\n" \
99 "// \n"
100 
101 #define MAX_LINE_LENGTH   256
102 #define MAX_PATH_NAME 512
103 #define MAX_PRONS_LENGTH 1024
104 #define SILENCE_PREFIX_WORD "-pau-"
105 #define SILENCE_SUFFIX_WORD "-pau2-"
106 #define SLOT_SUFFIX "__"
107 #define SLOT_PREFIX "__"
108 #define MAX_NUM_SLOTS 12 /* must agree with srec_context.h */
109 #define EXTRA_EPSILON_LABEL 39999 // must be higher than the number of models
110 #define DEFAULT_WB_COST 40
111 #define DEFAULT_WB_COST_STR "40"
112 #define SLOT_COUNTER_OFFSET 30000 // must be higher than the number of models
113 #define NOISE_PHONEME_CODE 'J'
114 
115 static int debug = 0;
116 static int verbose = 0;
117 
118 using namespace std;
119 
120 ESR_ReturnCode make_openfst_graphs(GRXMLDoc* pDoc, /* for metas */
121 				   const std::string& grxmlBasename,
122 				   const char* vocabFilename,
123 				   const char* cfstFilename,
124 				   const char* modelmapFilename);
125 
showline(const char * fn,int line_num)126 const char* showline(const char* fn, int line_num)
127 {
128   static char line[8096] = { 0 };
129   int line_count = 0;
130   ifstream strm(fn);
131   while (strm && strm.getline(line, sizeof(line)))
132 	  if(line_count++ == line_num) break;
133   return &line[0];
134 }
135 
ExtractFileName(const std::string & full)136 std::string ExtractFileName(const std::string& full)
137 {
138   std::string::size_type idx = full.find_last_of("/");
139 
140   if (idx != std::string::npos)
141     return full.substr(idx+1);
142   else
143     return full;
144 }
145 
146 /*-----------------------------------------------------------------------*
147  *                                                                       *
148  *                                                                       *
149  *-----------------------------------------------------------------------*/
150 
usage_error(const char * prgname)151 int usage_error(const char* prgname)
152 {
153   printf("USAGE: -par <par file> -grxml <grxml grammar file> -vocab <dictionary file (.ok)> [-outdir <output directory>]\n");
154   return (int)ESR_INVALID_ARGUMENT;
155 }
156 
main(int argc,char * argv[])157 int main(int argc, char* argv[])
158 {
159   ESR_ReturnCode status = ESR_SUCCESS;
160   char *parfile = NULL;
161   char *grxmlfile = NULL;
162   char *cmdline_vocfile = NULL;
163   std::string outdir("."); // default output dir is current directory
164   /* for now, assume char and LCHAR are the same, else fail to compile! */
165   { char zzz[ 1 - (sizeof(LCHAR)!=sizeof(char))]; zzz[0] = 0; }
166 
167 #ifdef MEMTRACE
168     mtrace();
169 #endif
170 
171 #if defined(GRXMLCOMPILE_PRINT_ACKNOWLEDGEMENT)
172     cout << OPENFST_ACKNOWLEDGEMENT <<std::endl;
173     cout << TINYXML_ACKNOWLEDGEMENT <<std::endl;
174     cout << NUANCE_COPYRIGHT <<std::endl;
175 #endif
176 
177     // Process all XML files given on command line
178 
179     if(argc<5){
180       return usage_error(argv[0]);
181     }
182 
183     for(int i=1;i<argc;i++)
184     {
185       if(!strcmp(argv[i],"-grxml"))
186         grxmlfile = argv[++i];
187       else if(!strcmp(argv[i],"-debug"))
188         debug++;
189       else if(!strcmp(argv[i],"-verbose"))
190         verbose++;
191       else if(!strcmp(argv[i],"-par") || !strcmp(argv[i],"-parfile"))
192         parfile = argv[++i];
193       else if(!strcmp(argv[i],"-vocab"))
194         cmdline_vocfile = argv[++i];
195       else if(!strcmp(argv[i],"-outdir"))
196         outdir = std::string(argv[++i]);
197       else {
198         printf("error_usage: argument [%s]\n", argv[i]);
199 	return usage_error(argv[0]);
200 	return (int)ESR_INVALID_ARGUMENT;
201       }
202     }
203 
204     //process_xml( std::string(grxmlfile), parfile );
205     std::string filename = std::string(grxmlfile);
206 
207     /***************************
208             process xml
209     ***************************/
210 
211     cout << "processing [" << filename << "] ..." << endl;
212 
213     TiXmlDocument node;
214     bool bLoadedOK = node.LoadFile( filename.c_str() );
215     if(!bLoadedOK || node.Error()) {
216       std::cout << "Error: while creating TiXmlDocument from " << filename << std::endl;
217       std::cout << "Error: " << node.Error() << " id " << node.ErrorId() << " row " << node.ErrorRow() << " col " << node.ErrorCol() << std::endl;
218       std::cout << "Error: " << node.ErrorDesc() <<  std::endl;
219       std::cout << "Error: near " << showline( filename.c_str(), node.ErrorRow()) << std::endl;
220       return (int)ESR_INVALID_ARGUMENT;
221     }
222 
223 
224     // *************************************************
225     //	Parse the file into a DOM object and create word graph
226     //
227     GRXMLDoc *doc = new (GRXMLDoc);
228     std::string filenameNoPath = ExtractFileName(filename);
229     doc->parseGrammar( node, filenameNoPath );   // THE PARSING AND NETWORK BUILD HAPPENS IN HERE
230     /************************
231       end of xml processing
232     ************************/
233 
234     // Create grammar network files. Use prefix of input file for output.
235     std::string s = filename;
236     std::string grxmlbase = outdir + "/" + ExtractFileName(grxmlfile);
237     size_t p1 = grxmlbase.find_last_of(".");
238     if ( p1 != string::npos )
239       grxmlbase.assign( grxmlbase, 0, p1);
240 
241     std::string newName;
242     newName = grxmlbase + ".map";
243     doc->writeMapFile( newName );
244     newName = grxmlbase + ".script";
245     doc->writeScriptFile( newName );
246 
247     doc->writeGraphFiles( grxmlbase, false );
248 
249     //
250     // SR initialization
251     //
252     char vocfile[MAX_PATH_NAME];
253     char cfstfile[MAX_PATH_NAME];
254     char modelmapfile[MAX_PATH_NAME];
255     size_t len;
256 
257     PMemInit();
258     printf("info: Using parfile %s\n",parfile);
259     status = SR_SessionCreate((const LCHAR*) parfile);
260     // status = SR_SessionCreate ( parfile );
261     if (  status != ESR_SUCCESS ) {
262       LPRINTF("Error: SR_SessionCreate(%s) %s\n", parfile, ESR_rc2str(status));
263       return (int)status;
264     }
265 
266     // vocfile
267     if(cmdline_vocfile) {
268       strcpy( vocfile, cmdline_vocfile);
269     } else {
270       len = MAX_PATH_NAME;
271       ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), (LCHAR*)vocfile, &len );
272       // skip PrefixWithBaseDirectory(), 'tis done inside SR_VocabularyLoad()
273     }
274     printf("info: Using dictionary %s\n",vocfile);
275 
276     // modelmapfile
277     len = MAX_PATH_NAME;
278     ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)modelmapfile, &len);
279     len = MAX_PATH_NAME;
280     status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)modelmapfile, &len);
281     char* p = strrchr(modelmapfile,'/');
282     if(!p) p = strrchr(modelmapfile,'\\');
283     if(p) strcpy(p, "/models128x.map");
284 
285     // cfstfile
286     len = MAX_PATH_NAME;
287     ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)cfstfile, &len);
288     len = MAX_PATH_NAME;
289     status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)cfstfile, &len);
290     p = strrchr(cfstfile,'/');
291     if(!p) p = strrchr(cfstfile,'\\');
292     if(p) strcpy(p, "/generic.C");
293 
294     status = make_openfst_graphs( doc, grxmlbase, (const char*)vocfile, (const char*)cfstfile, (const char*)modelmapfile);
295     if(status != ESR_SUCCESS) {
296       LPRINTF("Error: make_openfst_graphs() returned %s\n",  ESR_rc2str(status));
297     } else {
298       /* make_openfst_graphs() can sometimes call doc->setMeta() to put
299 	 Session parameters into the .params file, so writeParamsFile()
300 	 should be called after make_openfst_graphs() */
301       newName = grxmlbase + ".params";
302       doc->writeParamsFile( newName );
303     }
304 
305     //
306     // SR de-initialization
307     //
308     SR_SessionDestroy();
309     PMemShutdown();
310 
311     delete doc;
312     return (int)status;
313 }
314 
315 /*-----------------------------------------------------------------*
316  * utils                                                           *
317  *-----------------------------------------------------------------*/
318 
is_slot_symbol(const char * sym)319 bool is_slot_symbol( const char* sym)
320 {
321   const char* p = strstr(sym,SLOT_PREFIX);
322   int len = strlen(sym);
323   if(len>4 && !strcmp(sym+len-2,SLOT_SUFFIX) && (p-sym)<len-2) {
324     return true;
325   } else
326     return false;
327 }
328 
StrToId(const char * s,fst::SymbolTable * syms,const char * name)329 int64 StrToId(const char *s, fst::SymbolTable *syms,
330 	      const char *name)
331 {
332   int64 n;
333   if (syms) {
334     n = syms->Find(s);
335     if (n < 0) {
336       cerr << "FstReader: Symbol \"" << s
337 	   << "\" is not mapped to any integer " << name
338 	   << ", symbol table = " << syms->Name();
339     }
340   } else {
341     char *p;
342     n = strtoll(s, &p, 10);
343     if (p < s + strlen(s) || n < 0) {
344       cerr << "FstReader: Bad " << name << " integer = \"" << s;
345     }
346   }
347   return n;
348 }
349 
350 /* FstMergeOLabelsToILabels, FstSplitOLabelsFromILabels
351    are used to make sure the minimization does not go overboard in pushing
352    output labels toward the beginning of the graph.  When that happens
353    then the speech recognition decoder fails! */
354 
FstMergeOLabelsToILabels(fst::StdVectorFst & fst_,int max_ilabels)355 ESR_ReturnCode FstMergeOLabelsToILabels( fst::StdVectorFst& fst_, int max_ilabels )
356 {
357   fst::StdArc::StateId s = fst_.Start();
358   if (s == fst::kNoStateId)
359     return ESR_INVALID_ARGUMENT;
360   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
361        !siter.Done(); siter.Next()) {
362     s = siter.Value();
363 
364     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
365 	!aiter.Done(); aiter.Next()) {
366       fst::StdArc arc = aiter.Value();
367       if( arc.ilabel >= max_ilabels ||
368 	  (float)arc.ilabel + ((float)max_ilabels)*arc.olabel > INT_MAX) {
369 	std::cout << "Error: internal error in FstMergeOLabelsToILabels() " << std::endl;
370 	return ESR_NOT_IMPLEMENTED;
371       }
372       arc.ilabel = arc.ilabel + max_ilabels * arc.olabel;
373       arc.olabel = 0;
374       aiter.SetValue( arc);
375     }
376   }
377   return ESR_SUCCESS;
378 }
379 
FstMergeOLabelsToILabels_GetMax(fst::StdVectorFst & fst_,int & max_ilabel)380 ESR_ReturnCode FstMergeOLabelsToILabels_GetMax( fst::StdVectorFst& fst_, int& max_ilabel )
381 {
382   if (fst_.Start() == fst::kNoStateId) return ESR_INVALID_ARGUMENT;
383   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
384        !siter.Done(); siter.Next()) {
385     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, siter.Value());
386 	!aiter.Done(); aiter.Next()) {
387       if( aiter.Value().ilabel > max_ilabel)
388 	max_ilabel = aiter.Value().ilabel;
389     }
390   }
391   max_ilabel++;
392   return ESR_SUCCESS;
393 }
394 
FstSplitOLabelsFromILabels(fst::StdVectorFst & fst_,int max_ilabels)395 ESR_ReturnCode FstSplitOLabelsFromILabels( fst::StdVectorFst& fst_, int max_ilabels )
396 {
397   fst::StdArc::StateId s = fst_.Start();
398   if (s == fst::kNoStateId)
399     return ESR_INVALID_ARGUMENT;
400   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
401        !siter.Done(); siter.Next()) {
402     s = siter.Value();
403 
404     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
405 	!aiter.Done(); aiter.Next()) {
406       fst::StdArc arc = aiter.Value();
407       arc.olabel = arc.ilabel / max_ilabels;
408       arc.ilabel = arc.ilabel - arc.olabel*max_ilabels;
409       aiter.SetValue( arc);
410     }
411   }
412   return ESR_SUCCESS;
413 }
414 
415 /* this is to replace the "fake" extra epsilon input labels, which were
416    put there to disambiguate homonyms */
417 
FstReplaceILabel(fst::StdVectorFst & fst_,int from_ilabel,int into_ilabel)418 ESR_ReturnCode FstReplaceILabel( fst::StdVectorFst& fst_, int from_ilabel, int into_ilabel)
419 {
420   fst::StdArc::StateId s = fst_.Start();
421   if (s == fst::kNoStateId)
422     return ESR_INVALID_ARGUMENT;
423   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
424        !siter.Done(); siter.Next()) {
425     s = siter.Value();
426 
427     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
428 	!aiter.Done(); aiter.Next()) {
429       fst::StdArc arc = aiter.Value();
430       if(arc.ilabel == from_ilabel) {
431 	arc.ilabel = into_ilabel;
432 	aiter.SetValue( arc);
433       }
434     }
435   }
436   return ESR_SUCCESS;
437 }
438 
439 /* this pushes the slot labels forward which gives an opportunity for
440    multiple instances of the slot to be merged, eg. lookup NAME
441    vs lookup contact NAME .. if in separate rules, then they will
442    merge thanks to using 3 arcs for the NAME */
443 
FstPushSlotLikeOLabels(fst::StdVectorFst & fst_,int myMin,int myMax)444 ESR_ReturnCode FstPushSlotLikeOLabels( fst::StdVectorFst& fst_, int myMin, int myMax)
445 {
446   int i;
447   ESR_ReturnCode rc = ESR_SUCCESS;
448   char done_for_state[2*65536]; // hope this is enough!
449   memset( &done_for_state[0], 0, sizeof(done_for_state));
450 
451   fst::StdArc::StateId s = fst_.Start();
452   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
453        !siter.Done(); siter.Next()) {
454     s = siter.Value();
455 
456     if(done_for_state[ s]) continue;
457     done_for_state[ s]++;
458 
459     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
460 	!aiter.Done(); aiter.Next()) {
461       fst::StdArc arc = aiter.Value();
462       if(arc.olabel >= myMin && arc.olabel < myMax) {
463 	fst::StdArc::StateId s2 = arc.nextstate;
464 	int slotId = arc.olabel;
465 
466 	if(verbose)
467 	  std::cout << "info: FstPushSlotLikeOLabels() at state " << s << " arc ilabel " << arc.ilabel << " olabel " << arc.olabel << std::endl;
468 
469 	arc.ilabel = EPSILON_LABEL;
470 	arc.olabel = EPSILON_LABEL;
471 	arc.weight = 0; // zero weight
472 	aiter.SetValue( arc);
473 	done_for_state[ s2]++;
474 	for(fst::MutableArcIterator<fst::StdVectorFst> aiter2(&fst_, s2);
475 	    !aiter2.Done(); aiter2.Next()) {
476 	  fst::StdArc arc2 = aiter2.Value();
477 	  if(arc2.ilabel == WORD_BOUNDARY) {
478 	    std::cout << "Error: FstPushSlotLikeOLabels() failing, there could be confusion between the slot (hack-pron) and a real-pron, the slot olabel may have been pushed by earlier fst operations!" << std::endl;
479 	    rc = ESR_INVALID_STATE;
480 	  } else
481 	    arc2.ilabel = EPSILON_LABEL;
482 	  arc2.olabel = slotId;
483 	  aiter2.SetValue( arc2);
484 	}
485       }
486     }
487   }
488 
489   /* check */
490   int *num_pclg_arcs_using_slot = new int[myMax];
491   for(i=0;i<myMax;i++) num_pclg_arcs_using_slot[i] = 0;
492   for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
493        !siter.Done(); siter.Next()) {
494     s = siter.Value();
495 
496     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
497 	!aiter.Done(); aiter.Next()) {
498       fst::StdArc arc = aiter.Value();
499       if(arc.olabel >= myMin && arc.olabel < myMax)
500 	num_pclg_arcs_using_slot[arc.olabel]++;
501     }
502   }
503   for(i=0; i<myMax; i++) {
504     if(num_pclg_arcs_using_slot[i] > 1) {
505       std::cout << "Error: SREC will not support multiply referred slots." << std::endl;
506       std::cout << "Error: Consider re-working your grammar to merge the references into one rule" << std::endl;
507       std::cout << "Error: or use two different slots" << std::endl;
508       rc = ESR_NOT_SUPPORTED;
509     }
510   }
511   delete [] num_pclg_arcs_using_slot;
512 
513   return rc;
514 }
515 
516 /* gets the range of slot numbers, myMin inclusive, myMax is exclusive */
517 
get_slot_olabel_range(const fst::SymbolTable * syms,int * myMin,int * myMax)518 void get_slot_olabel_range( const fst::SymbolTable* syms, int* myMin, int* myMax)
519 {
520   // assumes slots are at the top of the symbol table
521   fst::SymbolTableIterator iter( *syms);
522   *myMin = *myMax = 0;
523   for(iter.Reset(); !iter.Done(); iter.Next() ) {
524     const char* sym = iter.Symbol();
525     if ( is_slot_symbol( sym)) {
526       if(! (*myMin)) *myMin = iter.Value();
527       *myMax = iter.Value()+1;
528     }
529   }
530 }
531 
532 /* SLOT_COUNTER_OFFSET
533    The cfst is used to turn phonemes into acoustic models, but we're using
534    special phonemes for the slots, and must here add those as pass through
535    in the Cfst, meaning that the slot marker must be unchanged after
536    composition.  To do that we find the places in the Cfst where silence is
537    used, and put the slot marker arcs in parallel.  This also causes the
538    models before the slot to assume silence to the right, and the models after
539    the slot to assume silence to the left, both of which are reasonable */
540 
FstAddSlotMarkersToCFst(fst::StdVectorFst & cfst_,int myMin,int myMax)541 ESR_ReturnCode FstAddSlotMarkersToCFst( fst::StdVectorFst& cfst_, int myMin, int myMax)
542 {
543   int num_silence_arcs_in_cfst = 0;
544   int mimicPhonemeCode = SILENCE_CODE;
545 
546   fst::StdArc::StateId s = cfst_.Start();
547   if (s == fst::kNoStateId)
548     return ESR_INVALID_ARGUMENT;
549   for (fst::StateIterator< fst::StdVectorFst> siter(cfst_);
550        !siter.Done(); siter.Next()) {
551     s = siter.Value();
552 
553     for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&cfst_, s);
554 	!aiter.Done(); aiter.Next()) {
555       fst::StdArc arc = aiter.Value();
556       if( arc.olabel == mimicPhonemeCode) {
557 	num_silence_arcs_in_cfst++;
558 	for(int i=myMin; i<myMax; i++)
559 	  cfst_.AddArc( s, fst::StdArc(SLOT_COUNTER_OFFSET+i /*model*/,
560 				       SLOT_COUNTER_OFFSET+i /*phoneme*/, 0.0, arc.nextstate));
561       }
562     }
563   }
564   fst::ArcSort(&cfst_, fst::StdOLabelCompare());
565   if(!num_silence_arcs_in_cfst)
566     return ESR_INVALID_ARGUMENT;
567   else
568     return ESR_SUCCESS;
569 }
570 
571 /*
572  * make the graphs used by the recognition engine during the search.
573  */
574 
make_openfst_graphs(GRXMLDoc * pDoc,const std::string & grxmlBasename,const char * vocabFilename,const char * cfstFilename,const char * modelmapFilename)575 ESR_ReturnCode make_openfst_graphs(  GRXMLDoc* pDoc,
576 				     const std::string& grxmlBasename,
577 				     const char* vocabFilename,
578 				     const char* cfstFilename,
579 				     const char* modelmapFilename)
580 {
581   SR_Vocabulary *vocab = 0;
582   ESR_ReturnCode rc;
583 
584   fst::StdVectorFst l_fst;      // .L file, created from the .map and .ok
585 
586   int stateSt, stateEn;
587   size_t len;
588   bool do_skip_interword_silence = false;
589   std::unordered_map<string,int> homonym_count;
590   int word_penalty = 0;
591 
592   rc = SR_VocabularyLoad(vocabFilename, &vocab);
593   if (rc != ESR_SUCCESS) {
594     cerr << "Error: " <<  ESR_rc2str(rc) << endl;
595     return ESR_INVALID_ARGUMENT; // goto CLEANUP;
596   }
597 
598   std::string word_penalty_str;
599   if( pDoc->findMeta(std::string("word_penalty"),word_penalty_str))
600     word_penalty = atoi((const char *)word_penalty_str.c_str());
601   else {
602     rc = ESR_SessionGetInt( L("CREC.Recognizer.wordpen"), &word_penalty);
603     if(rc != ESR_SUCCESS)
604       word_penalty = DEFAULT_WB_COST;
605     word_penalty_str = DEFAULT_WB_COST_STR;
606     pDoc->setMeta( std::string("word_penalty"), word_penalty_str) ;
607     cout << "using word_penalty " << word_penalty << endl;
608   }
609 
610   std::string do_skip_interword_silence_str;
611   if( pDoc->findMeta(std::string("do_skip_interword_silence"), do_skip_interword_silence_str))
612     do_skip_interword_silence = ((do_skip_interword_silence_str != "true") ? false : true);
613 
614   /*-----------------------------------------------------------------*
615    *   read the .map and .omap created from grxmlcompiler classes    *
616    *-----------------------------------------------------------------*/
617 
618   std::string omapFilename = grxmlBasename + std::string(".omap");
619   std::string imapFilename = grxmlBasename + std::string(".map");
620 
621   cout << "info: reading word symbols " << imapFilename << endl;
622   fst::SymbolTable *word_syms = fst::SymbolTable::ReadText(imapFilename);
623   if(!word_syms) {
624     cerr << "error: reading word_syms" << endl;
625     return ESR_INVALID_ARGUMENT;
626   }
627   cout << "info: reading parser symbols " << omapFilename << endl;
628   fst::SymbolTable *prsr_syms = fst::SymbolTable::ReadText(omapFilename);
629   if(!prsr_syms) {
630     cerr << "error: reading prsr_syms" << endl;
631     return ESR_INVALID_ARGUMENT;
632   }
633   cout << "info: reading model symbols " << modelmapFilename << endl;
634   fst::SymbolTable *model_syms = fst::SymbolTable::ReadText(modelmapFilename);
635   if(!prsr_syms) {
636     cerr << "error: reading prsr_syms" << endl;
637     return ESR_INVALID_ARGUMENT;
638   }
639   int max_model_sym = 0;
640   /* if(1) {
641      fst::SymbolTableIterator iter( *model_syms);
642      for(iter.Reset(); !iter.Done(); iter.Next() ) max_model_sym++; */
643 
644   /*-----------------------------------------------------------------*
645    * create the .L pronunciations transducer                         *
646    *-----------------------------------------------------------------*/
647 
648   // Adds state 0 to the initially empty FST and make it the start state.
649   stateSt = l_fst.AddState();
650   stateEn = l_fst.AddState();
651   l_fst.SetStart(stateSt);  // arg is state ID
652   l_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
653   l_fst.AddArc(stateEn, fst::StdArc(EPSILON_LABEL,EPSILON_LABEL,0.0,stateSt));
654 
655   int num_slots = 0;
656   fst::SymbolTableIterator iter( *word_syms);
657   for(iter.Reset(); !iter.Done(); iter.Next() ) {
658     ESR_ReturnCode rc;
659     LCHAR prons[MAX_PRONS_LENGTH];
660     const char* phrase = iter.Symbol();
661     int wordId = iter.Value();
662     bool wordId_is_silence = false;
663     bool wordId_is_slot    = false;
664     /* script or scope marker, skip it */
665     /* if( is_scope_marker( phrase) || is_script_marker(phrase))
666        continue; */
667     /* epsilon */
668     if(!strcmp( phrase, SILENCE_PREFIX_WORD)
669        || !strcmp(phrase,SILENCE_SUFFIX_WORD))
670       wordId_is_silence = true;
671     else if( !strcmp( phrase, "eps") && wordId == 0)
672       continue;
673     /* rule markers */
674     else if( strstr( phrase, ".grxml@"))
675       continue;
676     /* script markers */
677     else if( phrase[0]=='_' && strspn(phrase+1,"0123456789")==strlen(phrase+1))
678       continue;
679     else if(is_slot_symbol(phrase)) {
680       cout << "SLOT>> " << phrase << endl;
681       wordId_is_slot = true;
682       num_slots++;
683     }
684 
685     if(num_slots > MAX_NUM_SLOTS) {
686       std::cout << "Error: SREC may have trouble with this many slots! (" << num_slots << ")" << std::endl;
687       // return ESR_NOT_SUPPORTED;
688     }
689 
690     if(wordId_is_slot) {
691       int stateP = stateSt, statePp1;
692       /* with 2 arcs, we have a better chance to merge the slot if used from
693 	 different parts of the grammar, see FstPushSlotLikeOLabels elsewhere */
694       statePp1 = l_fst.AddState();
695       l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, wordId, 0.0, statePp1));
696       stateP = statePp1;
697       statePp1 = l_fst.AddState();
698       l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, EPSILON_LABEL, 0.0, statePp1));
699       stateP = statePp1;
700       l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
701     } else {
702       size_t len_used;
703       LCHAR *pron = 0, *p;
704       /* word is ok, get the pron */
705       len = MAX_PRONS_LENGTH;
706       rc = SR_VocabularyGetPronunciation(vocab, phrase, prons, &len);
707       if (rc != ESR_SUCCESS) {
708 	LPRINTF( "ERROR: SR_VocabularyGetPronunciation(*,%s,*,*) returned %s\n", phrase, ESR_rc2str(rc));
709 	SR_VocabularyDestroy(vocab);
710 	return rc;
711       }
712       for(len_used=0; len_used<len; ) {
713 	pron = &prons[0]+len_used;
714 	len_used += LSTRLEN(pron)+1;
715 	if( *pron == 0) break;
716 	int stateP = stateSt, statePp1;
717 	int olabel = wordId;
718 	LPRINTF("%s : %s\n", phrase, pron);
719 	/* main pronunciation */
720 	for(p=pron; *p; p++) {
721 	  statePp1 = l_fst.AddState();
722 	  if(*p == OPTSILENCE_CODE) {
723 	    l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, olabel, 0.0, statePp1));
724 	    l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, olabel, 0.0, statePp1));
725 	  } else {
726 	    l_fst.AddArc(stateP, fst::StdArc( *p, olabel, 0.0, statePp1));
727 	  }
728 	  stateP = statePp1;
729 	  olabel = EPSILON_LABEL;
730 	}
731 	/* add epsilons if this is a homonym */
732 	string pron_string = pron;
733 	std::unordered_map<string,int>::const_iterator it = homonym_count.find( pron_string);
734 	if(it == homonym_count.end()) {
735 	  homonym_count[ pron_string] = 0;
736 	} else {
737 	  homonym_count[ pron_string] = homonym_count[ pron_string]+1;
738 	}
739 	int extra_epsilons_needed = homonym_count[ pron_string] ;
740 	if(wordId_is_silence) extra_epsilons_needed = 0;
741 	for(int i=0;i<extra_epsilons_needed;i++) {
742 	  statePp1 = l_fst.AddState();
743 	  l_fst.AddArc(stateP, fst::StdArc( EXTRA_EPSILON_LABEL, olabel, 0.0, statePp1));
744 	  stateP = statePp1;
745 	}
746 	/* add optional silence after each word */
747 	if(!do_skip_interword_silence && !wordId_is_silence && !wordId_is_slot) {
748 	  statePp1 = l_fst.AddState();
749 	  l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, EPSILON_LABEL, 0.0, statePp1));
750 	  l_fst.AddArc(statePp1, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
751 	  l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
752 	} else if(wordId_is_silence && !strcmp(phrase, SILENCE_SUFFIX_WORD)) {
753 	  /* SILENCE_SUFFIX_WORD does not need a terminal .wb */
754 	  l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, EPSILON_LABEL, 0.0, stateEn));
755 	} else {
756 	  l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
757 	}
758       } // loop over multiple prons
759     } // slot vs non-slot
760   } /* .map (word_syms) iterator */
761 
762   std::string lfstFilename = grxmlBasename + ".L";
763   // We can save this FST to a file with:
764   if(debug) l_fst.Write(lfstFilename.c_str());
765 
766   /*-----------------------------------------------------------------*
767    *   read the .P.txt created from grxmlcompiler classes            *
768    *-----------------------------------------------------------------*/
769 
770   std::string ptxtFilename = grxmlBasename + std::string(".P.txt");
771   std::ifstream istrm(ptxtFilename.c_str());
772   if(!istrm) {
773     cerr << "error: reading ptxtFilename" << endl;
774     return ESR_INVALID_ARGUMENT;
775   }
776 
777   cout << "info: reading parser from text " << ptxtFilename << endl;
778   fst::FstReader<fst::StdArc> reader( istrm, ptxtFilename, word_syms, prsr_syms,
779 				      /*state_syms*/ NULL,
780 				      /*acceptor*/ false,
781 				      /*ikeep*/ false,
782 				      /*okeep*/ false,
783 				      /*nkeep*/ false);
784   // .P file, created from the .P.txt and .omap
785   const fst::StdVectorFst& p_fst = reader.Fst();
786 
787   /*-----------------------------------------------------------------*
788    *   make the helper FSTs                                          *
789    *-----------------------------------------------------------------*/
790 
791   cout << "info: creating helper fsts" << endl;
792   fst::StdVectorFst prefix_fst;
793   fst::StdVectorFst suffix_fst;
794   fst::StdVectorFst eps_fst;
795   // int eps_word = StrToId("eps", word_syms, "arc ilabel");
796   int pau_word = StrToId(SILENCE_PREFIX_WORD, word_syms, "arc ilabel");
797   int pau2_word = StrToId(SILENCE_SUFFIX_WORD, word_syms, "arc ilabel");
798   if(pau_word < 0 || pau2_word < 0)
799     return ESR_INVALID_ARGUMENT;
800 
801   stateSt = prefix_fst.AddState();
802   stateEn = prefix_fst.AddState();
803   prefix_fst.SetStart(stateSt);  // arg is state ID
804   prefix_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
805   prefix_fst.AddArc(stateSt, fst::StdArc(pau_word, pau_word, 0.0, stateEn));
806 
807   stateSt = suffix_fst.AddState();
808   stateEn = suffix_fst.AddState();
809   suffix_fst.SetStart(stateSt);  // arg is state ID
810   suffix_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
811   suffix_fst.AddArc(stateSt, fst::StdArc(pau2_word, pau2_word, 0.0, stateEn));
812 
813   stateSt = eps_fst.AddState();
814   stateEn = stateSt; // stateEn = eps_fst.AddState();
815   eps_fst.SetStart(stateSt);  // arg is state ID
816   eps_fst.SetFinal(stateEn, 0.0);  // 1st arg is state ID, 2nd arg weight
817   // eps_fst.AddArc(stateSt, fst::StdArc(eps_word, eps_word, 0.0, stateEn));
818 
819   /*-----------------------------------------------------------------*
820    *    make Grev2.det.txt                                           *
821    *-----------------------------------------------------------------*/
822   cout << "info: creating reverse g fst" << endl;
823   fst::StdVectorFst g_fst = p_fst;   // this is a copy!!
824   fst::StdVectorFst grev_fst;        // reversed
825   fst::StdVectorFst grev_min_fst;    // eps removed and minimized
826   fst::StdVectorFst grev_det_fst;
827 
828   fst::Project(&g_fst, fst::PROJECT_INPUT);
829   if(debug) g_fst.Write( grxmlBasename + ".G");
830   fst::Reverse( g_fst, &grev_fst);
831   if(debug) grev_fst.Write( grxmlBasename + ".Grev");
832   fst::RmEpsilon( &grev_fst, /*connect?*/ true );
833   if(debug) grev_fst.Write( grxmlBasename + ".Grevrme");
834   fst::Determinize(grev_fst, &grev_det_fst);
835   if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedet");
836   if(1) fst::Minimize(&grev_det_fst);
837   if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin");
838   fst::Concat( &eps_fst, grev_det_fst);
839   grev_det_fst = eps_fst;
840   if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin2");
841   std::string grevFilename = grxmlBasename + std::string(".Grev2.det.txt");
842 
843   cout << "info: writing reverse G fst as text " << grevFilename << endl;
844   ostream* ostrm1 = new ofstream( grevFilename.c_str(), ios_base::out);
845   fst::FstPrinter<fst::StdArc> printer1( grev_det_fst,
846 					word_syms, word_syms,
847 					 NULL, /*acceptor?*/ true);
848   printer1.Print( ostrm1, grevFilename);
849   delete ostrm1;
850 
851   /*-----------------------------------------------------------------*
852    *    make PCLG.txt                                                *
853    *-----------------------------------------------------------------*/
854 
855   fst::StdVectorFst* c_fst;
856   fst::StdVectorFst lg_fst;
857   fst::StdVectorFst clg_fst;
858   fst::StdVectorFst clg_det_fst;
859 
860   cout << "info: reading model fst " << cfstFilename << endl;
861   c_fst = fst::StdVectorFst::Read( cfstFilename);
862 
863   int slot_olabel_min=0, slot_olabel_max=0; // [min,max) .. ie excludes max
864   get_slot_olabel_range( word_syms, &slot_olabel_min, &slot_olabel_max);
865   if(slot_olabel_max > MAX_NUM_SLOTS)
866     std::cout << "Error: SREC may have trouble with this many slots! (" << slot_olabel_max << ")" << std::endl;
867 
868   /* add slot markers as if they were silence phonemes, this makes the context
869      for them as if the slot were silence, which is reasonable, although another
870      reasonable thing would be to allow all contexts.  Adding the true context
871      only would add complexity and slow down word addition too much. */
872 
873   rc = FstAddSlotMarkersToCFst( *c_fst, slot_olabel_min, slot_olabel_max);
874   if(rc) return rc;
875 
876   fst::Concat( &g_fst, suffix_fst);
877   fst::Concat( &prefix_fst, g_fst);
878   if(debug) prefix_fst.Write( grxmlBasename + ".G2");
879   fst::ComposeOptions copts( /*connect?*/ true);
880 
881   fst::ArcSort(&l_fst, fst::StdOLabelCompare());
882   fst::ArcSort(&prefix_fst, fst::StdILabelCompare());
883 
884   fst::Compose(l_fst, prefix_fst, &lg_fst, copts);
885   if(debug) lg_fst.Write( grxmlBasename + ".LG");
886   fst::ArcSort(&lg_fst, fst::StdILabelCompare());
887   if(debug) lg_fst.Write( grxmlBasename + ".LG2");
888 
889   fst::RmEpsilon( &lg_fst, /*connect?*/ true );
890   if(debug) lg_fst.Write( grxmlBasename + ".LGrme");
891   fst::Determinize( lg_fst, &clg_fst); // clg_fst is really lg_det_fst!
892   if(debug) clg_fst.Write( grxmlBasename + ".LGrmedet");
893   rc = FstReplaceILabel( clg_fst, EXTRA_EPSILON_LABEL, EPSILON_LABEL);
894   fst::Compose( *c_fst, clg_fst, &clg_det_fst, copts);
895   if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet");
896 
897   rc = FstMergeOLabelsToILabels_GetMax( clg_det_fst, /*int&*/max_model_sym);
898   if(verbose)
899     cout << "info: merging into ilabels I=i+" << max_model_sym << "*o" << endl;
900   rc = FstMergeOLabelsToILabels( clg_det_fst, max_model_sym);
901   if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet2");
902   fst::Minimize( &clg_det_fst);
903   if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet3");
904   if(verbose)
905     cout << "info: splitting from ilabels" << endl;
906   rc = FstSplitOLabelsFromILabels( clg_det_fst, max_model_sym);
907   if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet4");
908 
909   rc = FstPushSlotLikeOLabels( clg_det_fst, slot_olabel_min, slot_olabel_max);
910   if(rc != ESR_SUCCESS)
911         std::cout << "Error: FstPushSlotLikeOLabels() failed" << std::endl;
912   if(debug) clg_det_fst.Write( grxmlBasename + ".CLG");
913 
914   std::string pclgFilename = grxmlBasename + ".PCLG.txt";
915   ostream* ostrm = new ofstream( pclgFilename.c_str(), ios_base::out);
916   fst::FstPrinter<fst::StdArc> printer( clg_det_fst,
917 					model_syms, word_syms,
918 					NULL, /*acceptor?*/ false);
919   printer.Print( ostrm, pclgFilename);
920   delete ostrm;
921 
922   delete c_fst;
923   delete word_syms;  word_syms = NULL;
924   delete prsr_syms;  prsr_syms = NULL;
925   delete model_syms; model_syms = NULL;
926 
927   /*-----------------------------------------------------------------*
928    *    cleanup                                                      *
929    *-----------------------------------------------------------------*/
930 
931   if(vocab) {
932     SR_VocabularyDestroy(vocab);
933     vocab = NULL;
934   }
935 
936   return rc;
937 
938 }
939 
940 
941