• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  parseStringTest.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 
22 #include "pstdio.h"
23 #include "pmemory.h"
24 #include "plog.h"
25 
26 
27 #include "HashMap.h"
28 #include "SR_Grammar.h"
29 #include "SR_SemanticResult.h"
30 #include "ESR_Session.h"
31 #include "ESR_Locale.h"
32 #include "LCHAR.h"
33 
34 #include "PFileSystem.h"
35 #include "PANSIFileSystem.h"
36 
37 /* for testing RecognizerImpl.c, see below */
38 #include"buildopt.h"
39 #include"setting.h"
40 #include"srec_sizes.h"
41 #include"SR_GrammarImpl.h"
42 
43 /* defines */
44 #define MAX_LINE_LENGTH 256
45 #define MAX_STR_LENGTH  512
46 #define MAX_SEM_RESULTS   3
47 #define MAX_KEYS         30
48 
49 /* protos */
50 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout);
51 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout);
52 
53 /* struct */
54 typedef struct Opts
55 {
56   int use_parse_by_string_ids;
57   int do_check_all_ids;
58 }
59 Opts;
60 
usage(LCHAR * exename)61 int usage(LCHAR* exename)
62 {
63   pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename);
64   return 1;
65 }
66 
lstr_strip_multiple_spaces(LCHAR * trans)67 void lstr_strip_multiple_spaces(LCHAR* trans)
68 {
69   char *src=trans, *dst=trans;
70   for( ;(*dst = *src)!=L('\0'); src++) {
71     if(*dst != ' ') dst++;
72     else if(src[1] != ' ') dst++;
73   }
74 }
75 
76 /**
77  * Display the Semantic Result
78  */
display_results(SR_SemanticResult * result,PFile * fout)79 void display_results(SR_SemanticResult *result, PFile* fout)
80 {
81   size_t i, size, len;
82   LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
83   LCHAR  value[MAX_STR_LENGTH];
84   ESR_ReturnCode rc;
85 
86   size = MAX_KEYS;
87   rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */
88   if (rc == ESR_SUCCESS)
89   {
90     for (i = 0; i < size; i++)
91     {
92       len = MAX_STR_LENGTH;
93       if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS)
94         pfprintf(fout, "{%s : %s}\n", keys[i], value);
95       else
96         pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
97     }
98     pfprintf(fout, "--Done--\n");
99   }
100   else
101     pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
102 }
103 
Parse(SR_Grammar * grammar,LCHAR * trans,PFile * fout,Opts * opts)104 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts)
105 {
106   ESR_ReturnCode rc = ESR_SUCCESS;
107   size_t i, result_count, key_count;
108   SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
109   wordID wordIDs[32], *wordIDptr;
110   SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar;
111   wordmap* wmap;
112 
113   if (opts->do_check_all_ids)
114   {
115     wordID id;
116     Opts myopts;
117     memcpy(&myopts, opts, sizeof(myopts));
118     myopts.do_check_all_ids = 0;
119     wmap = pgrammar->syntax->synx->olabels;
120     /* start at word 4 because "eps, -pau- -pau2- @root */
121     for (id = 4; id < wmap->num_words; id++)
122     {
123       trans = wmap->words[id];
124       Parse(grammar, trans, fout, &myopts);
125     }
126     return 0;
127   }
128 
129   result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
130   for (i = 0; i < result_count; i++)
131     SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
132   lstrtrim(trans);
133   /* check for multiple space separators! */
134   lstr_strip_multiple_spaces(trans);
135 
136   if (!opts->use_parse_by_string_ids)
137   {
138     rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
139   }
140   else
141   {
142     char copy_of_trans[256], *p;
143     strcpy(copy_of_trans, trans);
144     wmap = pgrammar->syntax->synx->olabels;
145     wordIDs[0] = wordIDs[1] = MAXwordID;
146     wordIDptr = &wordIDs[0];
147     for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " "))
148     {
149       for (i = 0; i < wmap->num_words; i++)
150         if (!strcmp(wmap->words[i], p))
151         {
152           *wordIDptr++ = (wordID)i;
153           break;
154         }
155       if (i == wmap->num_words)
156       {
157         wordIDs[0] = MAXwordID;
158         break;
159       }
160     }
161     *wordIDptr++ = MAXwordID;
162 
163     /* printf("wordids:");
164        for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++)
165        printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]);
166        printf("\n"); */
167 
168     if (wordIDs[0] == MAXwordID)
169     {
170       result_count = 0;
171       rc = ESR_SUCCESS;
172     }
173     else
174     {
175       rc = pgrammar->semproc->flush(pgrammar->semproc);
176       rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans);
177       rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
178            wordIDs, semanticResults, &result_count);
179     }
180   }
181   if (rc != ESR_SUCCESS)
182   {
183     pfprintf(fout, "error (%s)\n\n", trans);
184     return rc;
185   }
186 
187   if (result_count < 1)
188   {
189     pfprintf(fout, "no parse (%s)\n\n", trans);
190   }
191   else
192   {
193     key_count = 0xffff;
194     rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count);
195     pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count);
196     for (i = 0; i < result_count; i++)
197       display_results(semanticResults[i], fout);
198 
199     for (i = 0; i < MAX_SEM_RESULTS; i++)
200     {
201       rc = semanticResults[i]->destroy(semanticResults[i]);
202       if (rc != ESR_SUCCESS)
203         return rc;
204     }
205   }
206   return ESR_SUCCESS;
207 }
208 
209 /* tests the transcription against the grammar and then decided based on what was expected of the test
210 whether or not is it considered a pass or fail */
ParseTestSet(SR_Grammar * grammar,LCHAR * trans,LCHAR * key,LCHAR * ref,LCHAR * result,PFile * fout)211 ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout)
212 {
213   size_t len;
214   ESR_ReturnCode rc;
215   int i, result_count;
216   SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
217   LCHAR  value[MAX_STR_LENGTH];
218 
219   result_count = MAX_SEM_RESULTS;
220   for (i = 0; i < result_count; i++)
221     SR_SemanticResultCreate(&semanticResults[i]);
222 
223   lstrtrim(trans);
224   /* check for multiple space separators! */
225   lstr_strip_multiple_spaces(trans);
226 
227   pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result);
228   rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
229   if (rc != ESR_SUCCESS)
230     return rc;
231 
232   /*result file will contain
233   transcription | key | reference | result | PASSESD/FAILED */
234 
235   if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
236   {
237     pfprintf(fout, "NO PARSE FOR: %s|%s|%s|  |", trans, key, ref);
238     if (strcmp("FAIL", result) == 0)
239       pfprintf(fout, "PASSED (%s)\n", trans);
240     else
241       pfprintf(fout, "FAILED (%s)\n", trans);
242   }
243   else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
244   {
245     for (i = 0; i < result_count; i++)
246     {
247       len = MAX_STR_LENGTH;
248       if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS)
249       {
250         pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value);
251 
252         if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0)
253           pfprintf(fout, "PASSED\n");
254         else
255           pfprintf(fout, "FAILED\n");
256       }
257       else
258       {
259         pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key);
260       }
261     }
262 
263     /*deallocate semantic results*/
264     for (i = 0; i < MAX_SEM_RESULTS; i++)
265     {
266       rc = semanticResults[i]->destroy(semanticResults[i]);
267       if (rc != ESR_SUCCESS)
268         return rc;
269     }
270   }
271   return ESR_SUCCESS;
272 }
273 
main(int argc,char ** argv)274 int main(int argc, char **argv)
275 {
276   LCHAR trans[MAX_LINE_LENGTH];
277   SR_Grammar* grammar = NULL;
278   ESR_ReturnCode rc;
279   LCHAR base[P_PATH_MAX] = L("");
280   LCHAR infilename[P_PATH_MAX] = L("");
281   LCHAR inRTfilename[P_PATH_MAX] = L("");
282   LCHAR outfilename[P_PATH_MAX] = L("");
283   PFile *fin = NULL, *fout = NULL;
284   int i;
285   LCHAR *rootrule = L("myRoot"), *p;
286   Opts opts = { 0, 0 };
287 
288   /*
289    * Initialize portable library.
290    */
291   CHKLOG(rc, PMemInit());
292 
293   fin = PSTDIN;
294   fout = PSTDOUT;
295 
296   if (argc < 3)
297   {
298     usage(argv[0]);
299     exit(EXIT_FAILURE);
300   }
301   for (i = 1; i < argc; ++i)
302   {
303     if (!LSTRCMP(argv[i], L("-base")))
304     {
305       ++i;
306       LSTRCPY(base, argv[i]);
307     }
308     else if (!LSTRCMP(argv[i], L("-in")))
309     {
310       ++i;
311       LSTRCPY(infilename, argv[i]);
312     }
313     else if (!LSTRCMP(argv[i], L("-out")))
314     {
315       ++i;
316       LSTRCPY(outfilename, argv[i]);
317     }
318     else if (!LSTRCMP(argv[i], L("-itest")))
319     {
320       ++i;
321       LSTRCPY(inRTfilename, argv[i]);
322     }
323     else if (!LSTRCMP(argv[i], L("-ids")))
324     {
325       opts.use_parse_by_string_ids = 1;
326     }
327     else if (!LSTRCMP(argv[i], L("-allids")))
328     {
329       opts.do_check_all_ids = 1;
330       opts.use_parse_by_string_ids = 1;
331     }
332     else
333       return usage(argv[0]);
334   }
335 
336   CHK(rc, PLogInit(NULL, 0));
337 
338   rc = SR_GrammarLoad(base, &grammar);
339   if (rc != ESR_SUCCESS)
340     goto CLEANUP;
341 
342   if (*outfilename)
343   {
344     if ((fout = pfopen(outfilename, "w")) == NULL)
345     {
346       pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename);
347       rc = 1;
348       goto CLEANUP;
349     }
350   }
351 
352   if (opts.do_check_all_ids)
353   {
354     rc = Parse(grammar, NULL, fout, &opts);
355   }
356   else if (*infilename)
357   {
358     if (LSTRCMP(infilename, "-") == 0)
359     {
360       fin = PSTDIN;
361     }
362     else if ((fin = pfopen(infilename, "r")) == NULL)
363     {
364       pfprintf(PSTDOUT, "Could not open file: %s\n", infilename);
365       rc = 1;
366       goto CLEANUP;
367     }
368     for (;;)
369     {
370       if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL)
371       {
372         if (!pfeof(fin))
373         {
374           rc = ESR_READ_ERROR;
375           PLogError(ESR_rc2str(rc));
376         }
377         break;
378       }
379       if (trans[0] == '#') continue;
380       lstrtrim(trans);
381       /* check for multiple space separators! */
382       lstr_strip_multiple_spaces(trans);
383       pfprintf(fout, "Transcription: %s\n", trans);
384       if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
385         goto CLEANUP;
386       pfprintf(fout, "\n");
387     }
388   }
389   else if (*inRTfilename) /*using a test file*/
390   {
391     if ((fin = pfopen(inRTfilename, "r")) == NULL)
392     {
393       pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename);
394       rc = 1;
395       goto CLEANUP;
396     }
397 
398     /*read through the test file parsing it into the variables
399      FORMAT: "the transciption" key "value"
400     */
401     while (ESR_TRUE)
402     {
403       if (0) rc = process_single_key_line(grammar, fin, fout);
404       else  rc = process_multi_key_line(grammar, rootrule, fin, fout);
405       if (rc == ESR_READ_ERROR)
406       {
407         rc = ESR_SUCCESS;
408         break;
409       }
410     }
411   }
412   else
413   {
414     /* get some transcriptions from the user */
415     pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n");
416     pfprintf(PSTDOUT, "'qqq' to quit\n");
417 
418     while (ESR_TRUE)
419     {
420       pfprintf(PSTDOUT, "> ");
421 
422       if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN))
423         break;
424       // remove trailing whitespace
425       for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {}
426       *p=0;
427 
428       if (!LSTRCMP("qqq", trans))
429         break;
430       else
431         if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
432           goto CLEANUP;
433     }
434   }
435 CLEANUP:
436   if (fin && fin != PSTDIN)
437     pfclose(fin);
438   if (fout && fout != PSTDOUT)
439     pfclose(fout);
440   if (grammar) grammar->destroy(grammar);
441   PLogShutdown();
442 /*  PANSIFileSystemDestroy();
443   PFileSystemDestroy();*/
444   PMemShutdown();
445   return rc;
446 }
447 
process_single_key_line(SR_Grammar * grammar,PFile * fin,PFile * fout)448 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout)
449 {
450   LCHAR* position;
451   LCHAR line[MAX_LINE_LENGTH];
452   LCHAR trans[MAX_LINE_LENGTH];
453   LCHAR key[MAX_LINE_LENGTH];
454   LCHAR refValue[MAX_LINE_LENGTH];
455   LCHAR result[MAX_LINE_LENGTH];
456   ESR_ReturnCode rc;
457 
458   position = pfgets(line, MAX_LINE_LENGTH, fin);
459   if (line[0] == '#')
460     return ESR_SUCCESS;
461   if (!strncmp(line, "__END__", 7))
462     return ESR_READ_ERROR;
463   if (position == NULL)
464   {
465     if (pfeof(fin))
466       return ESR_READ_ERROR;
467     else
468     {
469       PLogError(L("ESR_READ_ERROR"));
470       return ESR_READ_ERROR;
471     }
472   }
473 
474   //get the transcription to test
475   if ((position = strtok(line, "\"")) != NULL)
476   {
477     LSTRCPY(trans, position);
478   }
479   else
480   {
481     pfprintf(fout, "INVALID FORMAT for input line 1 \n");
482     rc = ESR_INVALID_ARGUMENT;
483     goto CLEANUP;
484   }
485 
486   //get the key (meaning)
487   if ((position = strtok(NULL, " \t")) != NULL)
488   {
489     LSTRCPY(key, position);
490   }
491   else
492   {
493     pfprintf(fout, "INVALID FORMAT for input line 2\n");
494     rc = ESR_INVALID_ARGUMENT;
495     goto CLEANUP;
496   }
497 
498   //get the expected return string
499   if ((position = strtok(NULL, "\"")) != NULL)
500   {
501     LSTRCPY(refValue, position);
502   }
503   else
504   {
505     pfprintf(fout, "INVALID FORMAT for input line 3\n");
506     rc = ESR_INVALID_ARGUMENT;
507     goto CLEANUP;
508   }
509 
510   //get the expected result PASS/FAIL
511   //there is no need to write PASS, if nothing is written PASS is assumed
512   if ((position = strtok(NULL, " \t\r\n\"")) != NULL)
513   {
514     LSTRCPY(result, position);
515 
516     if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0)
517     {
518       pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n");
519       rc = ESR_INVALID_ARGUMENT;
520       goto CLEANUP;
521     }
522 
523     if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS)
524       goto CLEANUP;
525   }
526   else
527   {
528     if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS)
529       goto CLEANUP;
530   }
531   rc = ESR_SUCCESS;
532 CLEANUP:
533   return rc;
534 }
535 
process_multi_key_line(SR_Grammar * grammar,const LCHAR * rootrule,PFile * fin,PFile * fout)536 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout)
537 {
538   LCHAR *position, *p;
539   LCHAR line[MAX_LINE_LENGTH];
540   LCHAR trans[MAX_LINE_LENGTH];
541   LCHAR keyvals[MAX_LINE_LENGTH];
542   ESR_ReturnCode rc;
543   SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
544   LCHAR refkey[MAX_LINE_LENGTH];
545   LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH];
546   size_t i, j, len;
547   size_t result_count;
548 
549   position = pfgets(line, MAX_LINE_LENGTH, fin);
550   if (line[0] == '#')
551     return ESR_SUCCESS;
552   if (!strncmp(line, "__END__", 7))
553     return ESR_READ_ERROR;
554   if (position == NULL)
555   {
556     if (pfeof(fin))
557       return ESR_READ_ERROR;
558     else
559     {
560       PLogError(L("ESR_READ_ERROR"));
561       return ESR_READ_ERROR;
562     }
563   }
564 
565   /* we're trying to parse
566     Hello there : BONJOUR
567    */
568   p = strtok(line, ":");
569   LSTRCPY(trans, p);
570   /* strip trailing spaces */
571   for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--)
572     trans[len-1] = 0;
573 
574   p = strtok(NULL, "\n\r");
575   /* strip leading spaces */
576   while (*p == ' ' || *p == '\t')  p++;
577   LSTRCPY(keyvals, p);
578 
579   result_count = MAX_SEM_RESULTS;
580   for (i = 0; i < result_count; i++)
581     SR_SemanticResultCreate(&semanticResults[i]);
582 
583   /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */
584   rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
585   if (rc != ESR_SUCCESS)
586     return rc;
587 
588   /*result file will contain
589   transcription | key | reference | result | PASSESD/FAILED */
590 
591   if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
592   {
593     pfprintf(fout, "%s|%s|  |", trans, keyvals);
594     if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-"))
595       pfprintf(fout, "PASSED\n");
596     else
597       pfprintf(fout, "FAILED\n");
598   }
599   else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
600   {
601     size_t size, len;
602     LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */
603     size = MAX_KEYS;
604     rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size);
605 
606     for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";"))
607     {
608       sprintf(refkey, "%s.%s", rootrule, p);
609       p = strchr(refkey, '=');
610       assert(p);
611       *p = 0;
612       p++;
613       if (*p == '\'') p++;
614       LSTRCPY(refval, p);
615       if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0;
616 
617       for (i = 0; i < result_count; i++)
618       {
619         len = MAX_STR_LENGTH;
620         for (j = 0; j < size; j++)
621           if (!strcmp(keys_available[j], refkey)) break;
622         if (j < size)
623           rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len);
624         else
625         {
626           LSTRCPY(value, "<NOSUCHKEY>");
627           rc = ESR_NO_MATCH_ERROR;
628         }
629         pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value);
630         if (strcmp(value, refval) == 0)
631           pfprintf(fout, "PASSED\n");
632         else
633           pfprintf(fout, "FAILED\n");
634       }
635     }
636 
637     /*deallocate semantic results*/
638     for (i = 0; i < MAX_SEM_RESULTS; i++)
639     {
640       rc = semanticResults[i]->destroy(semanticResults[i]);
641       if (rc != ESR_SUCCESS)
642         PLogError("%s while destroying", ESR_rc2str(rc));
643     }
644   }
645   return ESR_SUCCESS;
646 }
647 
648