1 /*---------------------------------------------------------------------------*
2 * grxmlcompile.cpp *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include "ptypes.h"
21
22 #include <ext/hash_map>
23
24 #include "fst/lib/fst.h"
25 #include "fst/lib/fstlib.h"
26 #include "fst/lib/arc.h"
27 #include "fst/lib/fst-decl.h"
28 #include "fst/lib/vector-fst.h"
29 #include "fst/lib/arcsort.h"
30 #include "fst/lib/invert.h"
31
32 #include "fst-io.h"
33
34 #include "ESR_Locale.h"
35 #include "LCHAR.h"
36 #include "pstdio.h"
37 #include "PFileSystem.h"
38 #include "PANSIFileSystem.h"
39 #include "plog.h"
40 #include "pmemory.h"
41 #include "ESR_Session.h"
42 #include "SR_Session.h"
43 #include "SR_Vocabulary.h"
44 #include "srec_arb.h" // for EPSILON_LABEL etc
45 #include <fstream>
46 #include <iostream>
47 #include "tinyxml.h"
48 #include "grxmldoc.h"
49
50 #ifdef MEMTRACE
51 #include <mcheck.h>
52 #endif
53
54 #define OPENFST_ACKNOWLEDGEMENT \
55 "This tool uses the OpenFst library. \n" \
56 "Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
57 " you may not use this file except in compliance with the License.\n" \
58 " You may obtain a copy of the License at" \
59 "\n" \
60 " http://www.apache.org/licenses/LICENSE-2.0\n" \
61 "\n" \
62 " Unless required by applicable law or agreed to in writing, software\n" \
63 " distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
64 " WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
65 " See the License for the specific language governing permissions and\n" \
66 " limitations under the License.\n" \
67 "\n" \
68 " This library was developed at Google Research (M. Riley, J. Schalkwyk, W. Skut) and NYU's Courant Institute (C. Allauzen, M. Mohri). It is intended to be comprehensive, flexible, efficient and scale well to large problems. It is an open source project distributed under the Apache license. \n"
69
70
71 #define TINYXML_ACKNOWLEDGEMENT \
72 "This tool uses the tinyxml library. \n" \
73 "Copyright (c) 2007 Project Admins: leethomason \n" \
74 "The TinyXML software is provided 'as-is', without any express or implied\n" \
75 "warranty. In no event will the authors be held liable for any damages\n" \
76 "arising from the use of this software.\n" \
77 "\n" \
78 "Permission is granted to anyone to use this software for any purpose,\n" \
79 "including commercial applications, and to alter it and redistribute it\n" \
80 "freely, subject to the following restrictions:\n"
81
82 #define NUANCE_COPYRIGHT \
83 "// grxmlcompile\n" \
84 "//\n" \
85 "// Licensed under the Apache License, Version 2.0 (the \"License\");\n" \
86 "// you may not use this file except in compliance with the License.\n" \
87 "// You may obtain a copy of the License at\n" \
88 "//\n" \
89 "// http://www.apache.org/licenses/LICENSE-2.0\n" \
90 "//\n" \
91 "// Unless required by applicable law or agreed to in writing, software\n" \
92 "// distributed under the License is distributed on an \"AS IS\" BASIS,\n" \
93 "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" \
94 "// See the License for the specific language governing permissions and\n" \
95 "// limitations under the License.\n" \
96 "//\n" \
97 "// This program compiles a .grxml grammar into the graphs needed for \n" \
98 "// decoding with SREC\n" \
99 "// \n"
100
101 #define MAX_LINE_LENGTH 256
102 #define MAX_PATH_NAME 512
103 #define MAX_PRONS_LENGTH 1024
104 #define SILENCE_PREFIX_WORD "-pau-"
105 #define SILENCE_SUFFIX_WORD "-pau2-"
106 #define SLOT_SUFFIX "__"
107 #define SLOT_PREFIX "__"
108 #define MAX_NUM_SLOTS 12 /* must agree with srec_context.h */
109 #define EXTRA_EPSILON_LABEL 39999 // must be higher than the number of models
110 #define DEFAULT_WB_COST 40
111 #define DEFAULT_WB_COST_STR "40"
112 #define SLOT_COUNTER_OFFSET 30000 // must be higher than the number of models
113 #define NOISE_PHONEME_CODE 'J'
114
115 static int debug = 0;
116 static int verbose = 0;
117
118 using namespace std;
119
120 ESR_ReturnCode make_openfst_graphs(GRXMLDoc* pDoc, /* for metas */
121 const std::string& grxmlBasename,
122 const char* vocabFilename,
123 const char* cfstFilename,
124 const char* modelmapFilename);
125
showline(const char * fn,int line_num)126 const char* showline(const char* fn, int line_num)
127 {
128 static char line[8096] = { 0 };
129 int line_count = 0;
130 ifstream strm(fn);
131 while (strm && strm.getline(line, sizeof(line)))
132 if(line_count++ == line_num) break;
133 return &line[0];
134 }
135
ExtractFileName(const std::string & full)136 std::string ExtractFileName(const std::string& full)
137 {
138 std::string::size_type idx = full.find_last_of("/");
139
140 if (idx != std::string::npos)
141 return full.substr(idx+1);
142 else
143 return full;
144 }
145
146 /*-----------------------------------------------------------------------*
147 * *
148 * *
149 *-----------------------------------------------------------------------*/
150
usage_error(const char * prgname)151 int usage_error(const char* prgname)
152 {
153 printf("USAGE: -par <par file> -grxml <grxml grammar file> -vocab <dictionary file (.ok)> [-outdir <output directory>]\n");
154 return (int)ESR_INVALID_ARGUMENT;
155 }
156
main(int argc,char * argv[])157 int main(int argc, char* argv[])
158 {
159 ESR_ReturnCode status = ESR_SUCCESS;
160 char *parfile = NULL;
161 char *grxmlfile = NULL;
162 char *cmdline_vocfile = NULL;
163 std::string outdir("."); // default output dir is current directory
164 /* for now, assume char and LCHAR are the same, else fail to compile! */
165 { char zzz[ 1 - (sizeof(LCHAR)!=sizeof(char))]; zzz[0] = 0; }
166
167 #ifdef MEMTRACE
168 mtrace();
169 #endif
170
171 #if defined(GRXMLCOMPILE_PRINT_ACKNOWLEDGEMENT)
172 cout << OPENFST_ACKNOWLEDGEMENT <<std::endl;
173 cout << TINYXML_ACKNOWLEDGEMENT <<std::endl;
174 cout << NUANCE_COPYRIGHT <<std::endl;
175 #endif
176
177 // Process all XML files given on command line
178
179 if(argc<5){
180 return usage_error(argv[0]);
181 }
182
183 for(int i=1;i<argc;i++)
184 {
185 if(!strcmp(argv[i],"-grxml"))
186 grxmlfile = argv[++i];
187 else if(!strcmp(argv[i],"-debug"))
188 debug++;
189 else if(!strcmp(argv[i],"-verbose"))
190 verbose++;
191 else if(!strcmp(argv[i],"-par") || !strcmp(argv[i],"-parfile"))
192 parfile = argv[++i];
193 else if(!strcmp(argv[i],"-vocab"))
194 cmdline_vocfile = argv[++i];
195 else if(!strcmp(argv[i],"-outdir"))
196 outdir = std::string(argv[++i]);
197 else {
198 printf("error_usage: argument [%s]\n", argv[i]);
199 return usage_error(argv[0]);
200 return (int)ESR_INVALID_ARGUMENT;
201 }
202 }
203
204 //process_xml( std::string(grxmlfile), parfile );
205 std::string filename = std::string(grxmlfile);
206
207 /***************************
208 process xml
209 ***************************/
210
211 cout << "processing [" << filename << "] ..." << endl;
212
213 TiXmlDocument node;
214 bool bLoadedOK = node.LoadFile( filename.c_str() );
215 if(!bLoadedOK || node.Error()) {
216 std::cout << "Error: while creating TiXmlDocument from " << filename << std::endl;
217 std::cout << "Error: " << node.Error() << " id " << node.ErrorId() << " row " << node.ErrorRow() << " col " << node.ErrorCol() << std::endl;
218 std::cout << "Error: " << node.ErrorDesc() << std::endl;
219 std::cout << "Error: near " << showline( filename.c_str(), node.ErrorRow()) << std::endl;
220 return (int)ESR_INVALID_ARGUMENT;
221 }
222
223
224 // *************************************************
225 // Parse the file into a DOM object and create word graph
226 //
227 GRXMLDoc *doc = new (GRXMLDoc);
228 std::string filenameNoPath = ExtractFileName(filename);
229 doc->parseGrammar( node, filenameNoPath ); // THE PARSING AND NETWORK BUILD HAPPENS IN HERE
230 /************************
231 end of xml processing
232 ************************/
233
234 // Create grammar network files. Use prefix of input file for output.
235 std::string s = filename;
236 std::string grxmlbase = outdir + "/" + ExtractFileName(grxmlfile);
237 unsigned int p1 = grxmlbase.find_last_of(".");
238 if ( p1 != string::npos )
239 grxmlbase.assign( grxmlbase, 0, p1);
240
241 std::string newName;
242 newName = grxmlbase + ".map";
243 doc->writeMapFile( newName );
244 newName = grxmlbase + ".script";
245 doc->writeScriptFile( newName );
246
247 doc->writeGraphFiles( grxmlbase, false );
248
249 //
250 // SR initialization
251 //
252 char vocfile[MAX_PATH_NAME];
253 char cfstfile[MAX_PATH_NAME];
254 char modelmapfile[MAX_PATH_NAME];
255 size_t len;
256
257 PMemInit();
258 printf("info: Using parfile %s\n",parfile);
259 status = SR_SessionCreate((const LCHAR*) parfile);
260 // status = SR_SessionCreate ( parfile );
261 if ( status != ESR_SUCCESS ) {
262 LPRINTF("Error: SR_SessionCreate(%s) %s\n", parfile, ESR_rc2str(status));
263 return (int)status;
264 }
265
266 // vocfile
267 if(cmdline_vocfile) {
268 strcpy( vocfile, cmdline_vocfile);
269 } else {
270 len = MAX_PATH_NAME;
271 ESR_SessionGetLCHAR ( L("cmdline.vocabulary"), (LCHAR*)vocfile, &len );
272 // skip PrefixWithBaseDirectory(), 'tis done inside SR_VocabularyLoad()
273 }
274 printf("info: Using dictionary %s\n",vocfile);
275
276 // modelmapfile
277 len = MAX_PATH_NAME;
278 ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)modelmapfile, &len);
279 len = MAX_PATH_NAME;
280 status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)modelmapfile, &len);
281 char* p = strrchr(modelmapfile,'/');
282 if(!p) p = strrchr(modelmapfile,'\\');
283 if(p) strcpy(p, "/models128x.map");
284
285 // cfstfile
286 len = MAX_PATH_NAME;
287 ESR_SessionGetLCHAR ( L("cmdline.arbfile"), (LCHAR*)cfstfile, &len);
288 len = MAX_PATH_NAME;
289 status = ESR_SessionPrefixWithBaseDirectory ( (LCHAR*)cfstfile, &len);
290 p = strrchr(cfstfile,'/');
291 if(!p) p = strrchr(cfstfile,'\\');
292 if(p) strcpy(p, "/generic.C");
293
294 status = make_openfst_graphs( doc, grxmlbase, (const char*)vocfile, (const char*)cfstfile, (const char*)modelmapfile);
295 if(status != ESR_SUCCESS) {
296 LPRINTF("Error: make_openfst_graphs() returned %s\n", ESR_rc2str(status));
297 } else {
298 /* make_openfst_graphs() can sometimes call doc->setMeta() to put
299 Session parameters into the .params file, so writeParamsFile()
300 should be called after make_openfst_graphs() */
301 newName = grxmlbase + ".params";
302 doc->writeParamsFile( newName );
303 }
304
305 //
306 // SR de-initialization
307 //
308 SR_SessionDestroy();
309 PMemShutdown();
310
311 delete doc;
312 return (int)status;
313 }
314
315 /*-----------------------------------------------------------------*
316 * utils *
317 *-----------------------------------------------------------------*/
318
is_slot_symbol(const char * sym)319 bool is_slot_symbol( const char* sym)
320 {
321 const char* p = strstr(sym,SLOT_PREFIX);
322 int len = strlen(sym);
323 if(len>4 && !strcmp(sym+len-2,SLOT_SUFFIX) && (p-sym)<len-2) {
324 return true;
325 } else
326 return false;
327 }
328
StrToId(const char * s,fst::SymbolTable * syms,const char * name)329 int64 StrToId(const char *s, fst::SymbolTable *syms,
330 const char *name)
331 {
332 int64 n;
333 if (syms) {
334 n = syms->Find(s);
335 if (n < 0) {
336 cerr << "FstReader: Symbol \"" << s
337 << "\" is not mapped to any integer " << name
338 << ", symbol table = " << syms->Name();
339 }
340 } else {
341 char *p;
342 n = strtoll(s, &p, 10);
343 if (p < s + strlen(s) || n < 0) {
344 cerr << "FstReader: Bad " << name << " integer = \"" << s;
345 }
346 }
347 return n;
348 }
349
350 /* FstMergeOLabelsToILabels, FstSplitOLabelsFromILabels
351 are used to make sure the minimization does not go overboard in pushing
352 output labels toward the beginning of the graph. When that happens
353 then the speech recognition decoder fails! */
354
FstMergeOLabelsToILabels(fst::StdVectorFst & fst_,int max_ilabels)355 ESR_ReturnCode FstMergeOLabelsToILabels( fst::StdVectorFst& fst_, int max_ilabels )
356 {
357 fst::StdArc::StateId s = fst_.Start();
358 if (s == fst::kNoStateId)
359 return ESR_INVALID_ARGUMENT;
360 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
361 !siter.Done(); siter.Next()) {
362 s = siter.Value();
363
364 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
365 !aiter.Done(); aiter.Next()) {
366 fst::StdArc arc = aiter.Value();
367 if( arc.ilabel >= max_ilabels ||
368 (float)arc.ilabel + ((float)max_ilabels)*arc.olabel > INT_MAX) {
369 std::cout << "Error: internal error in FstMergeOLabelsToILabels() " << std::endl;
370 return ESR_NOT_IMPLEMENTED;
371 }
372 arc.ilabel = arc.ilabel + max_ilabels * arc.olabel;
373 arc.olabel = 0;
374 aiter.SetValue( arc);
375 }
376 }
377 return ESR_SUCCESS;
378 }
379
FstMergeOLabelsToILabels_GetMax(fst::StdVectorFst & fst_,int & max_ilabel)380 ESR_ReturnCode FstMergeOLabelsToILabels_GetMax( fst::StdVectorFst& fst_, int& max_ilabel )
381 {
382 if (fst_.Start() == fst::kNoStateId) return ESR_INVALID_ARGUMENT;
383 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
384 !siter.Done(); siter.Next()) {
385 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, siter.Value());
386 !aiter.Done(); aiter.Next()) {
387 if( aiter.Value().ilabel > max_ilabel)
388 max_ilabel = aiter.Value().ilabel;
389 }
390 }
391 max_ilabel++;
392 return ESR_SUCCESS;
393 }
394
FstSplitOLabelsFromILabels(fst::StdVectorFst & fst_,int max_ilabels)395 ESR_ReturnCode FstSplitOLabelsFromILabels( fst::StdVectorFst& fst_, int max_ilabels )
396 {
397 fst::StdArc::StateId s = fst_.Start();
398 if (s == fst::kNoStateId)
399 return ESR_INVALID_ARGUMENT;
400 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
401 !siter.Done(); siter.Next()) {
402 s = siter.Value();
403
404 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
405 !aiter.Done(); aiter.Next()) {
406 fst::StdArc arc = aiter.Value();
407 arc.olabel = arc.ilabel / max_ilabels;
408 arc.ilabel = arc.ilabel - arc.olabel*max_ilabels;
409 aiter.SetValue( arc);
410 }
411 }
412 return ESR_SUCCESS;
413 }
414
415 /* this is to replace the "fake" extra epsilon input labels, which were
416 put there to disambiguate homonyms */
417
FstReplaceILabel(fst::StdVectorFst & fst_,int from_ilabel,int into_ilabel)418 ESR_ReturnCode FstReplaceILabel( fst::StdVectorFst& fst_, int from_ilabel, int into_ilabel)
419 {
420 fst::StdArc::StateId s = fst_.Start();
421 if (s == fst::kNoStateId)
422 return ESR_INVALID_ARGUMENT;
423 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
424 !siter.Done(); siter.Next()) {
425 s = siter.Value();
426
427 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
428 !aiter.Done(); aiter.Next()) {
429 fst::StdArc arc = aiter.Value();
430 if(arc.ilabel == from_ilabel) {
431 arc.ilabel = into_ilabel;
432 aiter.SetValue( arc);
433 }
434 }
435 }
436 return ESR_SUCCESS;
437 }
438
439 /* this pushes the slot labels forward which gives an opportunity for
440 multiple instances of the slot to be merged, eg. lookup NAME
441 vs lookup contact NAME .. if in separate rules, then they will
442 merge thanks to using 3 arcs for the NAME */
443
FstPushSlotLikeOLabels(fst::StdVectorFst & fst_,int myMin,int myMax)444 ESR_ReturnCode FstPushSlotLikeOLabels( fst::StdVectorFst& fst_, int myMin, int myMax)
445 {
446 int i;
447 ESR_ReturnCode rc = ESR_SUCCESS;
448 char done_for_state[2*65536]; // hope this is enough!
449 memset( &done_for_state[0], 0, sizeof(done_for_state));
450
451 fst::StdArc::StateId s = fst_.Start();
452 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
453 !siter.Done(); siter.Next()) {
454 s = siter.Value();
455
456 if(done_for_state[ s]) continue;
457 done_for_state[ s]++;
458
459 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
460 !aiter.Done(); aiter.Next()) {
461 fst::StdArc arc = aiter.Value();
462 if(arc.olabel >= myMin && arc.olabel < myMax) {
463 fst::StdArc::StateId s2 = arc.nextstate;
464 int slotId = arc.olabel;
465
466 if(verbose)
467 std::cout << "info: FstPushSlotLikeOLabels() at state " << s << " arc ilabel " << arc.ilabel << " olabel " << arc.olabel << std::endl;
468
469 arc.ilabel = EPSILON_LABEL;
470 arc.olabel = EPSILON_LABEL;
471 arc.weight = 0; // zero weight
472 aiter.SetValue( arc);
473 done_for_state[ s2]++;
474 for(fst::MutableArcIterator<fst::StdVectorFst> aiter2(&fst_, s2);
475 !aiter2.Done(); aiter2.Next()) {
476 fst::StdArc arc2 = aiter2.Value();
477 if(arc2.ilabel == WORD_BOUNDARY) {
478 std::cout << "Error: FstPushSlotLikeOLabels() failing, there could be confusion between the slot (hack-pron) and a real-pron, the slot olabel may have been pushed by earlier fst operations!" << std::endl;
479 rc = ESR_INVALID_STATE;
480 } else
481 arc2.ilabel = EPSILON_LABEL;
482 arc2.olabel = slotId;
483 aiter2.SetValue( arc2);
484 }
485 }
486 }
487 }
488
489 /* check */
490 int *num_pclg_arcs_using_slot = new int[myMax];
491 for(i=0;i<myMax;i++) num_pclg_arcs_using_slot[i] = 0;
492 for (fst::StateIterator< fst::StdVectorFst> siter(fst_);
493 !siter.Done(); siter.Next()) {
494 s = siter.Value();
495
496 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&fst_, s);
497 !aiter.Done(); aiter.Next()) {
498 fst::StdArc arc = aiter.Value();
499 if(arc.olabel >= myMin && arc.olabel < myMax)
500 num_pclg_arcs_using_slot[arc.olabel]++;
501 }
502 }
503 for(i=0; i<myMax; i++) {
504 if(num_pclg_arcs_using_slot[i] > 1) {
505 std::cout << "Error: SREC will not support multiply referred slots." << std::endl;
506 std::cout << "Error: Consider re-working your grammar to merge the references into one rule" << std::endl;
507 std::cout << "Error: or use two different slots" << std::endl;
508 rc = ESR_NOT_SUPPORTED;
509 }
510 }
511 delete [] num_pclg_arcs_using_slot;
512
513 return rc;
514 }
515
516 /* gets the range of slot numbers, myMin inclusive, myMax is exclusive */
517
get_slot_olabel_range(const fst::SymbolTable * syms,int * myMin,int * myMax)518 void get_slot_olabel_range( const fst::SymbolTable* syms, int* myMin, int* myMax)
519 {
520 // assumes slots are at the top of the symbol table
521 fst::SymbolTableIterator iter( *syms);
522 *myMin = *myMax = 0;
523 for(iter.Reset(); !iter.Done(); iter.Next() ) {
524 const char* sym = iter.Symbol();
525 if ( is_slot_symbol( sym)) {
526 if(! (*myMin)) *myMin = iter.Value();
527 *myMax = iter.Value()+1;
528 }
529 }
530 }
531
532 /* SLOT_COUNTER_OFFSET
533 The cfst is used to turn phonemes into acoustic models, but we're using
534 special phonemes for the slots, and must here add those as pass through
535 in the Cfst, meaning that the slot marker must be unchanged after
536 composition. To do that we find the places in the Cfst where silence is
537 used, and put the slot marker arcs in parallel. This also causes the
538 models before the slot to assume silence to the right, and the models after
539 the slot to assume silence to the left, both of which are reasonable */
540
FstAddSlotMarkersToCFst(fst::StdVectorFst & cfst_,int myMin,int myMax)541 ESR_ReturnCode FstAddSlotMarkersToCFst( fst::StdVectorFst& cfst_, int myMin, int myMax)
542 {
543 int num_silence_arcs_in_cfst = 0;
544 int mimicPhonemeCode = SILENCE_CODE;
545
546 fst::StdArc::StateId s = cfst_.Start();
547 if (s == fst::kNoStateId)
548 return ESR_INVALID_ARGUMENT;
549 for (fst::StateIterator< fst::StdVectorFst> siter(cfst_);
550 !siter.Done(); siter.Next()) {
551 s = siter.Value();
552
553 for(fst::MutableArcIterator<fst::StdVectorFst> aiter(&cfst_, s);
554 !aiter.Done(); aiter.Next()) {
555 fst::StdArc arc = aiter.Value();
556 if( arc.olabel == mimicPhonemeCode) {
557 num_silence_arcs_in_cfst++;
558 for(int i=myMin; i<myMax; i++)
559 cfst_.AddArc( s, fst::StdArc(SLOT_COUNTER_OFFSET+i /*model*/,
560 SLOT_COUNTER_OFFSET+i /*phoneme*/, 0.0, arc.nextstate));
561 }
562 }
563 }
564 fst::ArcSort(&cfst_, fst::StdOLabelCompare());
565 if(!num_silence_arcs_in_cfst)
566 return ESR_INVALID_ARGUMENT;
567 else
568 return ESR_SUCCESS;
569 }
570
571 /*
572 * make the graphs used by the recognition engine during the search.
573 */
574
make_openfst_graphs(GRXMLDoc * pDoc,const std::string & grxmlBasename,const char * vocabFilename,const char * cfstFilename,const char * modelmapFilename)575 ESR_ReturnCode make_openfst_graphs( GRXMLDoc* pDoc,
576 const std::string& grxmlBasename,
577 const char* vocabFilename,
578 const char* cfstFilename,
579 const char* modelmapFilename)
580 {
581 SR_Vocabulary *vocab = 0;
582 ESR_ReturnCode rc;
583
584 fst::StdVectorFst l_fst; // .L file, created from the .map and .ok
585
586 int stateSt, stateEn;
587 size_t len;
588 bool do_skip_interword_silence = false;
589 hash_map<string,int> homonym_count;
590 int word_penalty = 0;
591
592 rc = SR_VocabularyLoad(vocabFilename, &vocab);
593 if (rc != ESR_SUCCESS) {
594 cerr << "Error: " << ESR_rc2str(rc) << endl;
595 return ESR_INVALID_ARGUMENT; // goto CLEANUP;
596 }
597
598 std::string word_penalty_str;
599 if( pDoc->findMeta(std::string("word_penalty"),word_penalty_str))
600 word_penalty = atoi((const char *)word_penalty_str.c_str());
601 else {
602 rc = ESR_SessionGetInt( L("CREC.Recognizer.wordpen"), &word_penalty);
603 if(rc != ESR_SUCCESS)
604 word_penalty = DEFAULT_WB_COST;
605 word_penalty_str = DEFAULT_WB_COST_STR;
606 pDoc->setMeta( std::string("word_penalty"), word_penalty_str) ;
607 cout << "using word_penalty " << word_penalty << endl;
608 }
609
610 std::string do_skip_interword_silence_str;
611 if( pDoc->findMeta(std::string("do_skip_interword_silence"), do_skip_interword_silence_str))
612 do_skip_interword_silence = ((do_skip_interword_silence_str != "true") ? false : true);
613
614 /*-----------------------------------------------------------------*
615 * read the .map and .omap created from grxmlcompiler classes *
616 *-----------------------------------------------------------------*/
617
618 std::string omapFilename = grxmlBasename + std::string(".omap");
619 std::string imapFilename = grxmlBasename + std::string(".map");
620
621 cout << "info: reading word symbols " << imapFilename << endl;
622 fst::SymbolTable *word_syms = fst::SymbolTable::ReadText(imapFilename);
623 if(!word_syms) {
624 cerr << "error: reading word_syms" << endl;
625 return ESR_INVALID_ARGUMENT;
626 }
627 cout << "info: reading parser symbols " << omapFilename << endl;
628 fst::SymbolTable *prsr_syms = fst::SymbolTable::ReadText(omapFilename);
629 if(!prsr_syms) {
630 cerr << "error: reading prsr_syms" << endl;
631 return ESR_INVALID_ARGUMENT;
632 }
633 cout << "info: reading model symbols " << modelmapFilename << endl;
634 fst::SymbolTable *model_syms = fst::SymbolTable::ReadText(modelmapFilename);
635 if(!prsr_syms) {
636 cerr << "error: reading prsr_syms" << endl;
637 return ESR_INVALID_ARGUMENT;
638 }
639 int max_model_sym = 0;
640 /* if(1) {
641 fst::SymbolTableIterator iter( *model_syms);
642 for(iter.Reset(); !iter.Done(); iter.Next() ) max_model_sym++; */
643
644 /*-----------------------------------------------------------------*
645 * create the .L pronunciations transducer *
646 *-----------------------------------------------------------------*/
647
648 // Adds state 0 to the initially empty FST and make it the start state.
649 stateSt = l_fst.AddState();
650 stateEn = l_fst.AddState();
651 l_fst.SetStart(stateSt); // arg is state ID
652 l_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight
653 l_fst.AddArc(stateEn, fst::StdArc(EPSILON_LABEL,EPSILON_LABEL,0.0,stateSt));
654
655 int num_slots = 0;
656 fst::SymbolTableIterator iter( *word_syms);
657 for(iter.Reset(); !iter.Done(); iter.Next() ) {
658 ESR_ReturnCode rc;
659 LCHAR prons[MAX_PRONS_LENGTH];
660 const char* phrase = iter.Symbol();
661 int wordId = iter.Value();
662 bool wordId_is_silence = false;
663 bool wordId_is_slot = false;
664 /* script or scope marker, skip it */
665 /* if( is_scope_marker( phrase) || is_script_marker(phrase))
666 continue; */
667 /* epsilon */
668 if(!strcmp( phrase, SILENCE_PREFIX_WORD)
669 || !strcmp(phrase,SILENCE_SUFFIX_WORD))
670 wordId_is_silence = true;
671 else if( !strcmp( phrase, "eps") && wordId == 0)
672 continue;
673 /* rule markers */
674 else if( strstr( phrase, ".grxml@"))
675 continue;
676 /* script markers */
677 else if( phrase[0]=='_' && strspn(phrase+1,"0123456789")==strlen(phrase+1))
678 continue;
679 else if(is_slot_symbol(phrase)) {
680 cout << "SLOT>> " << phrase << endl;
681 wordId_is_slot = true;
682 num_slots++;
683 }
684
685 if(num_slots > MAX_NUM_SLOTS) {
686 std::cout << "Error: SREC may have trouble with this many slots! (" << num_slots << ")" << std::endl;
687 // return ESR_NOT_SUPPORTED;
688 }
689
690 if(wordId_is_slot) {
691 int stateP = stateSt, statePp1;
692 /* with 2 arcs, we have a better chance to merge the slot if used from
693 different parts of the grammar, see FstPushSlotLikeOLabels elsewhere */
694 statePp1 = l_fst.AddState();
695 l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, wordId, 0.0, statePp1));
696 stateP = statePp1;
697 statePp1 = l_fst.AddState();
698 l_fst.AddArc(stateP, fst::StdArc( wordId+SLOT_COUNTER_OFFSET, EPSILON_LABEL, 0.0, statePp1));
699 stateP = statePp1;
700 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
701 } else {
702 size_t len_used;
703 LCHAR *pron = 0, *p;
704 /* word is ok, get the pron */
705 len = MAX_PRONS_LENGTH;
706 rc = SR_VocabularyGetPronunciation(vocab, phrase, prons, &len);
707 if (rc != ESR_SUCCESS) {
708 LPRINTF( "ERROR: SR_VocabularyGetPronunciation(*,%s,*,*) returned %s\n", phrase, ESR_rc2str(rc));
709 SR_VocabularyDestroy(vocab);
710 return rc;
711 }
712 for(len_used=0; len_used<len; ) {
713 pron = &prons[0]+len_used;
714 len_used += LSTRLEN(pron)+1;
715 if( *pron == 0) break;
716 int stateP = stateSt, statePp1;
717 int olabel = wordId;
718 LPRINTF("%s : %s\n", phrase, pron);
719 /* main pronunciation */
720 for(p=pron; *p; p++) {
721 statePp1 = l_fst.AddState();
722 if(*p == OPTSILENCE_CODE) {
723 l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, olabel, 0.0, statePp1));
724 l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, olabel, 0.0, statePp1));
725 } else {
726 l_fst.AddArc(stateP, fst::StdArc( *p, olabel, 0.0, statePp1));
727 }
728 stateP = statePp1;
729 olabel = EPSILON_LABEL;
730 }
731 /* add epsilons if this is a homonym */
732 string pron_string = pron;
733 hash_map<string,int>::const_iterator it = homonym_count.find( pron_string);
734 if(it == homonym_count.end()) {
735 homonym_count[ pron_string] = 0;
736 } else {
737 homonym_count[ pron_string] = homonym_count[ pron_string]+1;
738 }
739 int extra_epsilons_needed = homonym_count[ pron_string] ;
740 if(wordId_is_silence) extra_epsilons_needed = 0;
741 for(int i=0;i<extra_epsilons_needed;i++) {
742 statePp1 = l_fst.AddState();
743 l_fst.AddArc(stateP, fst::StdArc( EXTRA_EPSILON_LABEL, olabel, 0.0, statePp1));
744 stateP = statePp1;
745 }
746 /* add optional silence after each word */
747 if(!do_skip_interword_silence && !wordId_is_silence && !wordId_is_slot) {
748 statePp1 = l_fst.AddState();
749 l_fst.AddArc(stateP, fst::StdArc( SILENCE_CODE, EPSILON_LABEL, 0.0, statePp1));
750 l_fst.AddArc(statePp1, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
751 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
752 } else if(wordId_is_silence && !strcmp(phrase, SILENCE_SUFFIX_WORD)) {
753 /* SILENCE_SUFFIX_WORD does not need a terminal .wb */
754 l_fst.AddArc(stateP, fst::StdArc( EPSILON_LABEL, EPSILON_LABEL, 0.0, stateEn));
755 } else {
756 l_fst.AddArc(stateP, fst::StdArc( WORD_BOUNDARY, EPSILON_LABEL, 0.0, stateEn));
757 }
758 } // loop over multiple prons
759 } // slot vs non-slot
760 } /* .map (word_syms) iterator */
761
762 std::string lfstFilename = grxmlBasename + ".L";
763 // We can save this FST to a file with:
764 if(debug) l_fst.Write(lfstFilename.c_str());
765
766 /*-----------------------------------------------------------------*
767 * read the .P.txt created from grxmlcompiler classes *
768 *-----------------------------------------------------------------*/
769
770 std::string ptxtFilename = grxmlBasename + std::string(".P.txt");
771 std::ifstream istrm(ptxtFilename.c_str());
772 if(!istrm) {
773 cerr << "error: reading ptxtFilename" << endl;
774 return ESR_INVALID_ARGUMENT;
775 }
776
777 cout << "info: reading parser from text " << ptxtFilename << endl;
778 fst::FstReader<fst::StdArc> reader( istrm, ptxtFilename, word_syms, prsr_syms,
779 /*state_syms*/ NULL,
780 /*acceptor*/ false,
781 /*ikeep*/ false,
782 /*okeep*/ false,
783 /*nkeep*/ false);
784 // .P file, created from the .P.txt and .omap
785 const fst::StdVectorFst& p_fst = reader.Fst();
786
787 /*-----------------------------------------------------------------*
788 * make the helper FSTs *
789 *-----------------------------------------------------------------*/
790
791 cout << "info: creating helper fsts" << endl;
792 fst::StdVectorFst prefix_fst;
793 fst::StdVectorFst suffix_fst;
794 fst::StdVectorFst eps_fst;
795 // int eps_word = StrToId("eps", word_syms, "arc ilabel");
796 int pau_word = StrToId(SILENCE_PREFIX_WORD, word_syms, "arc ilabel");
797 int pau2_word = StrToId(SILENCE_SUFFIX_WORD, word_syms, "arc ilabel");
798 if(pau_word < 0 || pau2_word < 0)
799 return ESR_INVALID_ARGUMENT;
800
801 stateSt = prefix_fst.AddState();
802 stateEn = prefix_fst.AddState();
803 prefix_fst.SetStart(stateSt); // arg is state ID
804 prefix_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight
805 prefix_fst.AddArc(stateSt, fst::StdArc(pau_word, pau_word, 0.0, stateEn));
806
807 stateSt = suffix_fst.AddState();
808 stateEn = suffix_fst.AddState();
809 suffix_fst.SetStart(stateSt); // arg is state ID
810 suffix_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight
811 suffix_fst.AddArc(stateSt, fst::StdArc(pau2_word, pau2_word, 0.0, stateEn));
812
813 stateSt = eps_fst.AddState();
814 stateEn = stateSt; // stateEn = eps_fst.AddState();
815 eps_fst.SetStart(stateSt); // arg is state ID
816 eps_fst.SetFinal(stateEn, 0.0); // 1st arg is state ID, 2nd arg weight
817 // eps_fst.AddArc(stateSt, fst::StdArc(eps_word, eps_word, 0.0, stateEn));
818
819 /*-----------------------------------------------------------------*
820 * make Grev2.det.txt *
821 *-----------------------------------------------------------------*/
822 cout << "info: creating reverse g fst" << endl;
823 fst::StdVectorFst g_fst = p_fst; // this is a copy!!
824 fst::StdVectorFst grev_fst; // reversed
825 fst::StdVectorFst grev_min_fst; // eps removed and minimized
826 fst::StdVectorFst grev_det_fst;
827
828 fst::Project(&g_fst, fst::PROJECT_INPUT);
829 if(debug) g_fst.Write( grxmlBasename + ".G");
830 fst::Reverse( g_fst, &grev_fst);
831 if(debug) grev_fst.Write( grxmlBasename + ".Grev");
832 fst::RmEpsilon( &grev_fst, /*connect?*/ true );
833 if(debug) grev_fst.Write( grxmlBasename + ".Grevrme");
834 fst::Determinize(grev_fst, &grev_det_fst);
835 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedet");
836 if(1) fst::Minimize(&grev_det_fst);
837 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin");
838 fst::Concat( &eps_fst, grev_det_fst);
839 grev_det_fst = eps_fst;
840 if(debug) grev_det_fst.Write( grxmlBasename + ".Grevrmedetmin2");
841 std::string grevFilename = grxmlBasename + std::string(".Grev2.det.txt");
842
843 cout << "info: writing reverse G fst as text " << grevFilename << endl;
844 ostream* ostrm1 = new ofstream( grevFilename.c_str(), ios_base::out);
845 fst::FstPrinter<fst::StdArc> printer1( grev_det_fst,
846 word_syms, word_syms,
847 NULL, /*acceptor?*/ true);
848 printer1.Print( ostrm1, grevFilename);
849 delete ostrm1;
850
851 /*-----------------------------------------------------------------*
852 * make PCLG.txt *
853 *-----------------------------------------------------------------*/
854
855 fst::StdVectorFst* c_fst;
856 fst::StdVectorFst lg_fst;
857 fst::StdVectorFst clg_fst;
858 fst::StdVectorFst clg_det_fst;
859
860 cout << "info: reading model fst " << cfstFilename << endl;
861 c_fst = fst::StdVectorFst::Read( cfstFilename);
862
863 int slot_olabel_min=0, slot_olabel_max=0; // [min,max) .. ie excludes max
864 get_slot_olabel_range( word_syms, &slot_olabel_min, &slot_olabel_max);
865 if(slot_olabel_max > MAX_NUM_SLOTS)
866 std::cout << "Error: SREC may have trouble with this many slots! (" << slot_olabel_max << ")" << std::endl;
867
868 /* add slot markers as if they were silence phonemes, this makes the context
869 for them as if the slot were silence, which is reasonable, although another
870 reasonable thing would be to allow all contexts. Adding the true context
871 only would add complexity and slow down word addition too much. */
872
873 rc = FstAddSlotMarkersToCFst( *c_fst, slot_olabel_min, slot_olabel_max);
874 if(rc) return rc;
875
876 fst::Concat( &g_fst, suffix_fst);
877 fst::Concat( &prefix_fst, g_fst);
878 if(debug) prefix_fst.Write( grxmlBasename + ".G2");
879 fst::ComposeOptions copts( /*connect?*/ true);
880
881 fst::ArcSort(&l_fst, fst::StdOLabelCompare());
882 fst::ArcSort(&prefix_fst, fst::StdILabelCompare());
883
884 fst::Compose(l_fst, prefix_fst, &lg_fst, copts);
885 if(debug) lg_fst.Write( grxmlBasename + ".LG");
886 fst::ArcSort(&lg_fst, fst::StdILabelCompare());
887 if(debug) lg_fst.Write( grxmlBasename + ".LG2");
888
889 fst::RmEpsilon( &lg_fst, /*connect?*/ true );
890 if(debug) lg_fst.Write( grxmlBasename + ".LGrme");
891 fst::Determinize( lg_fst, &clg_fst); // clg_fst is really lg_det_fst!
892 if(debug) clg_fst.Write( grxmlBasename + ".LGrmedet");
893 rc = FstReplaceILabel( clg_fst, EXTRA_EPSILON_LABEL, EPSILON_LABEL);
894 fst::Compose( *c_fst, clg_fst, &clg_det_fst, copts);
895 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet");
896
897 rc = FstMergeOLabelsToILabels_GetMax( clg_det_fst, /*int&*/max_model_sym);
898 if(verbose)
899 cout << "info: merging into ilabels I=i+" << max_model_sym << "*o" << endl;
900 rc = FstMergeOLabelsToILabels( clg_det_fst, max_model_sym);
901 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet2");
902 fst::Minimize( &clg_det_fst);
903 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet3");
904 if(verbose)
905 cout << "info: splitting from ilabels" << endl;
906 rc = FstSplitOLabelsFromILabels( clg_det_fst, max_model_sym);
907 if(debug) clg_det_fst.Write( grxmlBasename + ".CLGrmedet4");
908
909 rc = FstPushSlotLikeOLabels( clg_det_fst, slot_olabel_min, slot_olabel_max);
910 if(rc != ESR_SUCCESS)
911 std::cout << "Error: FstPushSlotLikeOLabels() failed" << std::endl;
912 if(debug) clg_det_fst.Write( grxmlBasename + ".CLG");
913
914 std::string pclgFilename = grxmlBasename + ".PCLG.txt";
915 ostream* ostrm = new ofstream( pclgFilename.c_str(), ios_base::out);
916 fst::FstPrinter<fst::StdArc> printer( clg_det_fst,
917 model_syms, word_syms,
918 NULL, /*acceptor?*/ false);
919 printer.Print( ostrm, pclgFilename);
920 delete ostrm;
921
922 delete c_fst;
923 delete word_syms; word_syms = NULL;
924 delete prsr_syms; prsr_syms = NULL;
925 delete model_syms; model_syms = NULL;
926
927 /*-----------------------------------------------------------------*
928 * cleanup *
929 *-----------------------------------------------------------------*/
930
931 if(vocab) {
932 SR_VocabularyDestroy(vocab);
933 vocab = NULL;
934 }
935
936 return rc;
937
938 }
939
940
941