• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  test_g2g.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 
22 #include "pstdio.h"
23 #include "pmemory.h"
24 #include "plog.h"
25 #include "HashMap.h"
26 #include "SR_Grammar.h"
27 #include "SR_Vocabulary.h"
28 #include "SR_SemanticResult.h"
29 #include "ESR_Session.h"
30 #include "ESR_Locale.h"
31 #include "ESR_CommandLine.h"
32 #include "LCHAR.h"
33 
34 #include "PFileSystem.h"
35 #include "PANSIFileSystem.h"
36 
37 #include "SR_GrammarImpl.h"
38 
39 #include "simapi.h"
40 #include "srec_context.h"
41 #include "srec_arb.h"
42 
43 /**
44  * @todo document
45  */
46 typedef struct
47 {
48   unsigned short nnodes;
49   unsigned long  size;
50   long    phoneme;
51   unsigned short node_pos;
52   unsigned long  node_off;
53   short    low_genone_no;
54   short    high_genone_no;
55   short    low_pel_no;
56   short    high_pel_no;
57 }
58 tree_head;
59 
60 
usage(LCHAR * exename)61 int usage(LCHAR* exename)
62 {
63   pfprintf(PSTDOUT,"usage: %s -base <basefilename> \n",exename);
64   pfprintf(PSTDOUT,"<basefilename> can be a file.g2g or @g2gfilelist\n");
65   pfprintf(PSTDOUT,"[-checkword id] .. also checks word id in the file\n");
66   pfprintf(PSTDOUT,"[-swiarb esr/config/lang/models/generic.swiarb] ... enables word check\n");
67   return 1;
68 }
69 
70 /* protos */
71 ESR_ReturnCode find_phonemes_for_ihmms( CA_Arbdata* ca_arbdata, modelID* ihmms, int num_hmms);
72 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout);
73 int CheckG2G(CA_Arbdata* arbdata, int* p4pTable, const char* base, int wordid, char* outbase);
74 void load_filelist(char* filelist, char*** pfiles, int *pnum_files);
75 int *phonemecode_for_pel_table(CA_Arbdata* arbdata);
76 
77 int debug = 0;
78 #define MAX_LINE_LENGTH 256
79 #define MAX_STR_LENGTH   80
80 #define MAX_SEM_RESULTS   3
81 #define MAX_KEYS         30
82 
83 /* main */
84 
main(int argc,char ** argv)85 int main (int argc, char **argv)
86 {
87   ESR_ReturnCode rc;
88   LCHAR base[P_PATH_MAX] = L("");
89   int i;
90   CA_Arbdata* ca_arbdata;
91   char*  arbfile = NULL;
92   char** g2glist;
93   int g2glist_len;
94   char* outbase = NULL;
95   int *p4pTable;
96   int wordid = 0;
97   int log_level = 0;
98 
99   /*
100    * Initialize portable library.
101    */
102   CHKLOG(rc, PMemInit());
103 /*  CHKLOG(rc, PFileSystemCreate());
104   CHKLOG(rc, PANSIFileSystemCreate());
105   CHKLOG(rc, PANSIFileSystemAddPath(L("/dev/ansi"), L("/")));*/
106 
107   /* Set ANSI file-system as default file-system */
108 /*  CHKLOG(rc, PANSIFileSystemSetDefault(ESR_TRUE));*/
109   /* Set virtual current working directory to native current working directory */
110 /*  len = P_PATH_MAX;
111   CHKLOG(rc, PANSIFileSystemGetcwd(cwd, &len));
112   CHKLOG(rc, PFileSystemChdir(cwd));*/
113 
114   if( argc <= 1)
115 	{
116     usage(argv[0]);
117     exit(EXIT_FAILURE);
118   }
119 
120 	for (i = 1; i < argc; ++i)
121 	{
122 		if(!LSTRCMP(argv[i], L("-base")))
123 		{
124 			++i;
125 			LSTRCPY(base, argv[i]);
126 		}
127 		else if(!LSTRCMP(argv[i],L("-out")))
128 		{
129 			outbase = argv[++i];
130 		}
131 		else if(!LSTRCMP(argv[i],L("-swiarb")))
132 		{
133 			arbfile = argv[++i];
134 		}
135 		else if(!LSTRCMP(argv[i],L("-checkword")))
136 		{
137 			wordid = atoi(argv[++i]);
138 		}
139 		else if(!LSTRCMP(argv[i],L("-log")))
140 		{
141 			log_level = 10;
142 		}
143 		else
144 		{
145 			printf("unrecog'd argument %s\n", argv[i]);
146 			exit(1);
147 		}
148 	}
149 
150 	CHK(rc, PLogInit(NULL, log_level));
151 
152 	if(arbfile) {
153 		ca_arbdata = CA_LoadArbdata(arbfile);
154 		if(!ca_arbdata) {
155       pfprintf(PSTDOUT, "Error: loading arbfile %s\n", arbfile);
156       goto CLEANUP;
157     }
158     pfprintf(PSTDOUT, "arbdata done\n");
159     p4pTable  = phonemecode_for_pel_table(ca_arbdata);
160     pfprintf(PSTDOUT, "p4pTable done\n");
161   } else {
162     ca_arbdata = 0;
163     p4pTable = 0;
164   }
165 
166   if(base[0] == '@') {
167     load_filelist(base+1, &g2glist, &g2glist_len);
168     pfprintf(PSTDOUT, "g2glist %s .. %d entries\n", g2glist_len);
169     for(i=0; i<g2glist_len; i++)
170       CheckG2G( ca_arbdata, p4pTable, g2glist[i], wordid, outbase);
171   }
172   else {
173     CheckG2G( ca_arbdata, p4pTable, base, wordid, outbase);
174   }
175 
176 CLEANUP:
177   PLogShutdown();
178 /*  PANSIFileSystemDestroy();
179   PFileSystemDestroy();*/
180   PMemSetLogFile(PSTDOUT);
181   PMemDumpLogFile();
182   PMemShutdown();
183   return rc;
184 }
185 
CheckG2G(CA_Arbdata * ca_arbdata,int * p4pTable,const char * base,int wordid,char * outbase)186 int CheckG2G(CA_Arbdata* ca_arbdata, int* p4pTable, const char* base, int wordid, char* outbase)
187 {
188   ESR_ReturnCode rc;
189   SR_GrammarImpl *grammarImpl;
190   SR_Grammar* grammar = NULL;
191   srec_context* fst;
192   CA_Syntax* syntax;
193   modelID ilabels_preceding[64], num_ilabels_preceding;
194   modelID ilabels_following[64], num_ilabels_following;
195   modelID ilabels[128], num_ilabels;
196   int i,j;
197   unsigned long g2gsize;
198 
199   if(1) {
200     FILE* fp;
201     fp = fopen(base, "rb");
202     if(!fp) g2gsize = 0;
203     else {
204       fseek(fp, 0, SEEK_END);
205       g2gsize = ftell(fp);
206       fclose(fp);
207     }
208   }
209 
210   rc = SR_GrammarLoad(base, &grammar);
211   if(rc != ESR_SUCCESS) {
212     pfprintf(PSTDOUT, "%s failed at load\n", base);
213     goto CLEANUP;
214   }
215 
216   grammarImpl = (SR_GrammarImpl*)grammar;
217   syntax = grammarImpl->syntax;
218   if(outbase) {
219     CA_DumpSyntax( syntax, outbase);
220   }
221 
222   fst = syntax->synx;
223   pfprintf(PSTDOUT, "%s %d arcs %d/%d/%d nodes %d/%d/%d words %d/%d chars %d/%d modelver %d\n",
224 	   base, g2gsize,
225 		 fst->num_arcs, fst->num_base_arcs, fst->FSMarc_list_len,
226 		 fst->num_nodes, fst->num_base_nodes, fst->FSMnode_list_len,
227 	   fst->olabels->num_words, fst->olabels->max_words,
228 	   fst->olabels->next_chars-fst->olabels->chars,
229 	   fst->olabels->max_chars,
230 #ifdef IMAGE_FORMAT_V2
231 	   fst->modelid
232 #else
233 	   -1
234 #endif
235 	   );
236 
237   if(wordid == 0 || ca_arbdata == 0)
238     goto CLEANUP;
239 
240   if(wordid >= fst->olabels->num_words) {
241     pfprintf(PSTDOUT, "%s failed 'cuz numwords(%d) < %d\n", base,
242 	     fst->olabels->num_words, wordid);
243     goto CLEANUP;
244   }
245 
246   for(i=0; i<fst->num_arcs; i++) {
247     if(fst->FSMarc_list[i].olabel == wordid) {
248       FSMnode* node;
249       FSMarc* arc = &fst->FSMarc_list[i];
250       nodeID fr_node = arc->fr_node;
251       arcID iarc;
252       ilabels_following[0] = arc->ilabel;
253       num_ilabels_following = 1;
254       num_ilabels_preceding = 0;
255       for( ; fr_node!=fst->start_node; fr_node=arc->fr_node) {
256 	node = &fst->FSMnode_list[fr_node];
257 	iarc = node->first_prev_arc;
258 	for( ; iarc!=MAXarcID; iarc=arc->linkl_prev_arc) {
259 	  arc = &fst->FSMarc_list[iarc];
260 	  if(arc->fr_node != fr_node) break;
261 	}
262 	if(iarc == MAXarcID) {
263 	  pfprintf(PSTDOUT, "%s failed at 11\n", base);
264 	  goto CLEANUP;
265 	}
266 	if(arc->ilabel == WORD_BOUNDARY) break;
267 	ilabels_preceding[num_ilabels_preceding++] = arc->ilabel;
268       }
269       arc = &fst->FSMarc_list[i];
270       fr_node = arc->to_node;
271       for( ; fr_node!=fst->end_node; fr_node=arc->to_node) {
272 	node = &fst->FSMnode_list[fr_node];
273 	iarc = node->un_ptr.first_next_arc;
274 	for( ; iarc!=MAXarcID; iarc=arc->linkl_next_arc) {
275 	  arc = &fst->FSMarc_list[iarc];
276 	  if(arc->to_node != fr_node) break;
277 	}
278 	if(iarc == MAXarcID) {
279 	  pfprintf(PSTDOUT, "%s failed at 12\n", base);
280 	  goto CLEANUP;
281 	}
282 	ilabels_following[num_ilabels_following++] = arc->ilabel;
283 	if(arc->ilabel == WORD_BOUNDARY) break;
284       }
285       num_ilabels = 0;
286       for(j=0; j<num_ilabels_preceding; j++)
287 	ilabels[num_ilabels++] = ilabels_preceding[num_ilabels_preceding-1-j];
288       for(j=0; j<num_ilabels_following; j++)
289 	ilabels[num_ilabels++] = ilabels_following[j];
290       if(ilabels[num_ilabels-1] == WORD_BOUNDARY)
291 	num_ilabels--;
292       for(j=0; j<num_ilabels; j++) {
293 	if(ilabels[j]<fst->hmm_ilabel_offset) {
294 	  pfprintf(PSTDOUT, "%s failed at 15\n", base);
295 	  goto CLEANUP;
296 	} else
297 	  ilabels[j] = ilabels[j] - (labelID)fst->hmm_ilabel_offset;
298       }
299       pfprintf(PSTDOUT, "%s (W%d) ihmms ", fst->olabels->words[wordid], wordid);
300       for(j=0;j<num_ilabels;j++)
301 	pfprintf(PSTDOUT, " %d", ilabels[j]);
302       pfprintf(PSTDOUT, "\n");
303       if(num_ilabels < 2) {
304 	pfprintf(PSTDOUT, "%s failed at 1\n", base);
305 	goto CLEANUP;
306       }
307       if(p4pTable)
308 	rc = find_phonemes_for_ihmms( ca_arbdata, ilabels, num_ilabels);
309       else {
310 	rc = ESR_SUCCESS;
311 	for(j=0; j<num_ilabels; j++) {
312 	  if(p4pTable[ ilabels[j]]<0) {
313 	    rc = ESR_NO_MATCH_ERROR;
314 	    ilabels[j] = MAXmodelID;
315 	  } else {
316 	    ilabels[j] = (modelID)p4pTable[ ilabels[j]];
317 	  }
318 	}
319       }
320 
321       if(rc) {
322 	pfprintf(PSTDOUT, "%s failed at 2\n", base);
323 	goto CLEANUP;
324       }
325       pfprintf(PSTDOUT, "%s ", fst->olabels->words[wordid]);
326       for(j=0;j<num_ilabels;j++) pfprintf(PSTDOUT, "%c", ilabels[j]);
327       pfprintf(PSTDOUT, "\n");
328       rc = Parse( grammar, fst->olabels->words[wordid], PSTDOUT);
329       if(rc) {
330 	pfprintf(PSTDOUT, "%s failed at 3\n", base);
331 	goto CLEANUP;
332       }
333       pfprintf(PSTDOUT, "%s PASSED (on %s)\n", base, fst->olabels->words[wordid]);
334       break;
335     }
336   }
337 
338   return 0;
339  CLEANUP:
340   if(grammar) SR_GrammarDestroy(grammar);
341   return 1;
342 
343 }
344 
345 
traverse_tree(tree_node * node,tree_head * tree_topo,int * num_terminal_nodes)346 int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes)
347 {
348   if(node)
349     tree_topo->nnodes++;
350 
351   if(node->node.quest_index < 0) {
352     if(num_terminal_nodes)
353       (*num_terminal_nodes)++;
354     if( node->term.pelid < tree_topo->low_pel_no)
355       tree_topo->low_pel_no = tree_topo->low_genone_no = node->term.pelid;
356     if( node->term.pelid > tree_topo->high_pel_no)
357       tree_topo->high_pel_no = tree_topo->high_genone_no = node->term.pelid;
358   } else {
359     traverse_tree( (tree_node*)node->node.fail, tree_topo, num_terminal_nodes);
360     traverse_tree( (tree_node*)node->node.pass, tree_topo, num_terminal_nodes);
361   }
362   return 0;
363 
364 }
365 
num_nodes_in_tree(tree_node * node,int * num_terminal_nodes)366 int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes)
367 {
368   tree_head topo;
369   *num_terminal_nodes = 0;
370   topo.nnodes = 0;
371   traverse_tree(node, &topo, num_terminal_nodes);
372   return topo.nnodes;
373 }
374 
find_phonemes_for_ihmms(CA_Arbdata * ca_arbdata,modelID * ihmms,int num_ihmms)375 ESR_ReturnCode find_phonemes_for_ihmms( CA_Arbdata* ca_arbdata, modelID* ihmms, int num_ihmms)
376 {
377   int ii, i;
378   int num_hmms_in_phoneme;
379   tree_head topo;
380   srec_arbdata* a = (srec_arbdata*)ca_arbdata;
381   int num_phonemes_for_ihmms = 0;
382 
383   for(ii=0; ii<num_ihmms; ii++) {
384     for(i=0; i<a->num_phonemes; i++) {
385       num_hmms_in_phoneme = 0;
386       topo.low_pel_no  = 32567;
387       topo.high_pel_no = 0;
388       traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
389       if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
390 		      topo.low_pel_no, topo.high_pel_no);
391       if(ihmms[ii] >= topo.low_pel_no && ihmms[ii]<= topo.high_pel_no) {
392 	ihmms[ii] = (modelID)i;
393 	num_phonemes_for_ihmms++;
394 	break;
395       }
396     }
397     if( i==a->num_phonemes) {
398       if(ihmms[ii]<=5) {
399 	ihmms[ii] = 0;
400 	num_phonemes_for_ihmms++;
401       } else {
402 	PLogError("error: could not find hmm%d under any phoneme! ",ihmms[ii]);
403       }
404     }
405 
406   }
407   if(num_phonemes_for_ihmms != num_ihmms)
408     return ESR_INVALID_ARGUMENT;
409   else {
410     for(ii=0; ii<num_ihmms; ii++) ihmms[ii] =  a->pdata[ ihmms[ii]].code;
411     return ESR_SUCCESS;
412   }
413 }
414 
display_results(SR_SemanticResult * result,PFile * fout)415 void display_results(SR_SemanticResult *result, PFile* fout)
416 {
417   size_t i, size, len;
418   LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
419   LCHAR  value[MAX_STR_LENGTH];
420   ESR_ReturnCode rc;
421 
422   size = MAX_KEYS;
423   rc = result->getKeyList(result, (LCHAR**) &keys, &size); /* get the key list */
424   if(rc == ESR_SUCCESS)
425   {
426     for(i=0; i<size; i++)
427     {
428       len = MAX_STR_LENGTH;
429       if ((rc = result->getValue(result,keys[i],value,&len)) == ESR_SUCCESS)
430         pfprintf(fout,"{%s : %s}\n",keys[i],value);
431       else
432         pfprintf(fout,"Error: %s\n",ESR_rc2str(rc));
433     }
434   }
435   else
436     pfprintf(fout,"Error: %s\n",ESR_rc2str(rc));
437 }
438 
Parse(SR_Grammar * grammar,LCHAR * trans,PFile * fout)439 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout)
440 {
441   ESR_ReturnCode rc;
442   int i, result_count;
443   SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
444 
445   result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
446   for(i =0; i<result_count; i++)
447     SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
448 
449   lstrtrim(trans);
450 
451   rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) &result_count);
452   if(rc != ESR_SUCCESS)
453     return rc;
454 
455   if(result_count < 1)
456   {
457     pfprintf(fout,"no parse\n\n");
458     return ESR_NO_MATCH_ERROR;
459   }
460   else
461   {
462     pfprintf(fout,"parse ok (%d results)\n", result_count);
463     for(i=0; i < result_count; i++)
464       display_results(semanticResults[i],fout);
465 
466     for(i=0; i < MAX_SEM_RESULTS; i++)
467     {
468       rc = semanticResults[i]->destroy(semanticResults[i]);
469       if(rc != ESR_SUCCESS)
470         return rc;
471     }
472     return ESR_SUCCESS;
473   }
474 }
475 
load_filelist(char * filelist,char *** pfiles,int * pnum_files)476 void load_filelist(char* filelist, char*** pfiles, int *pnum_files)
477 {
478   int i = 0;
479   FILE* fp;
480   char line[512];
481   char **files = 0, *file;
482   int num_files = 0;
483 
484   fp = fopen(filelist, "r");
485   if(!fp) {
486     pfprintf(PSTDOUT, "failed to open %s\n", filelist);
487     goto DONE;
488   }
489 
490   while( fgets(line, sizeof(line), fp)) {
491     if(line[0] == '#') continue;
492     i++;
493   }
494   fclose(fp);
495 
496   num_files = i;
497   *files = CALLOC( num_files, sizeof(char*), __FILE__);
498   fp = fopen(filelist, "r");
499   for(i=0; fgets(line,sizeof(line),fp) && i<num_files; i++) {
500     if(line[0] == '#') continue;
501     strtok(line,"\n\r\t");
502     file = files[i++] = CALLOC(strlen(line)+1,sizeof(char),__FILE__);
503     strcpy( file, line);
504   }
505   fclose(fp);
506   num_files = i;
507 
508  DONE:
509   *pfiles = files;
510   *pnum_files = num_files;
511 }
512 
phonemecode_for_pel_table(CA_Arbdata * ca_arbdata)513 int* phonemecode_for_pel_table(CA_Arbdata* ca_arbdata)
514 {
515   static int table[2048];
516   int i,j;
517   tree_head topo;
518   srec_arbdata* a = (srec_arbdata*)ca_arbdata;
519   int num_hmms_in_phoneme;
520 
521   for(j=0; j< (int)(sizeof(table)/sizeof(int)); j++)
522     table[j] = 0;
523 
524   for(i=0; i<a->num_phonemes; i++) {
525     num_hmms_in_phoneme = 0;
526     topo.low_pel_no  = 32567;
527     topo.high_pel_no = 0;
528     traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
529     if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
530 		    topo.low_pel_no, topo.high_pel_no);
531 
532     for(j=topo.low_pel_no; j<=topo.high_pel_no; j++)
533       table[j] = a->pdata[i].code;
534   }
535   return &table[0];
536 }
537