1 /*---------------------------------------------------------------------------*
2 * parseStringTest.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20
21
22 #include "pstdio.h"
23 #include "pmemory.h"
24 #include "plog.h"
25
26
27 #include "HashMap.h"
28 #include "SR_Grammar.h"
29 #include "SR_SemanticResult.h"
30 #include "ESR_Session.h"
31 #include "ESR_Locale.h"
32 #include "LCHAR.h"
33
34 #include "PFileSystem.h"
35 #include "PANSIFileSystem.h"
36
37 /* for testing RecognizerImpl.c, see below */
38 #include"buildopt.h"
39 #include"setting.h"
40 #include"srec_sizes.h"
41 #include"SR_GrammarImpl.h"
42
43 /* defines */
44 #define MAX_LINE_LENGTH 256
45 #define MAX_STR_LENGTH 512
46 #define MAX_SEM_RESULTS 3
47 #define MAX_KEYS 30
48
49 /* protos */
50 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout);
51 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout);
52
53 /* struct */
54 typedef struct Opts
55 {
56 int use_parse_by_string_ids;
57 int do_check_all_ids;
58 }
59 Opts;
60
usage(LCHAR * exename)61 int usage(LCHAR* exename)
62 {
63 pfprintf(PSTDOUT, "usage: %s -base <basefilename> [-in <input file>] [-out <output file>] [-itest <testfilename>]\n", exename);
64 return 1;
65 }
66
lstr_strip_multiple_spaces(LCHAR * trans)67 void lstr_strip_multiple_spaces(LCHAR* trans)
68 {
69 char *src=trans, *dst=trans;
70 for( ;(*dst = *src)!=L('\0'); src++) {
71 if(*dst != ' ') dst++;
72 else if(src[1] != ' ') dst++;
73 }
74 }
75
76 /**
77 * Display the Semantic Result
78 */
display_results(SR_SemanticResult * result,PFile * fout)79 void display_results(SR_SemanticResult *result, PFile* fout)
80 {
81 size_t i, size, len;
82 LCHAR* keys[MAX_KEYS]; /* array of pointers to strings */
83 LCHAR value[MAX_STR_LENGTH];
84 ESR_ReturnCode rc;
85
86 size = MAX_KEYS;
87 rc = result->getKeyList(result, (LCHAR**) & keys, &size); /* get the key list */
88 if (rc == ESR_SUCCESS)
89 {
90 for (i = 0; i < size; i++)
91 {
92 len = MAX_STR_LENGTH;
93 if ((rc = result->getValue(result, keys[i], value, &len)) == ESR_SUCCESS)
94 pfprintf(fout, "{%s : %s}\n", keys[i], value);
95 else
96 pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
97 }
98 pfprintf(fout, "--Done--\n");
99 }
100 else
101 pfprintf(fout, "Error: %s\n", ESR_rc2str(rc));
102 }
103
Parse(SR_Grammar * grammar,LCHAR * trans,PFile * fout,Opts * opts)104 ESR_ReturnCode Parse(SR_Grammar* grammar, LCHAR* trans, PFile* fout, Opts* opts)
105 {
106 ESR_ReturnCode rc = ESR_SUCCESS;
107 size_t i, result_count, key_count;
108 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
109 wordID wordIDs[32], *wordIDptr;
110 SR_GrammarImpl* pgrammar = (SR_GrammarImpl*)grammar;
111 wordmap* wmap;
112
113 if (opts->do_check_all_ids)
114 {
115 wordID id;
116 Opts myopts;
117 memcpy(&myopts, opts, sizeof(myopts));
118 myopts.do_check_all_ids = 0;
119 wmap = pgrammar->syntax->synx->olabels;
120 /* start at word 4 because "eps, -pau- -pau2- @root */
121 for (id = 4; id < wmap->num_words; id++)
122 {
123 trans = wmap->words[id];
124 Parse(grammar, trans, fout, &myopts);
125 }
126 return 0;
127 }
128
129 result_count = MAX_SEM_RESULTS; /* initially not greater than MAX */
130 for (i = 0; i < result_count; i++)
131 SR_SemanticResultCreate(&semanticResults[i]); /* create the result holders */
132 lstrtrim(trans);
133 /* check for multiple space separators! */
134 lstr_strip_multiple_spaces(trans);
135
136 if (!opts->use_parse_by_string_ids)
137 {
138 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
139 }
140 else
141 {
142 char copy_of_trans[256], *p;
143 strcpy(copy_of_trans, trans);
144 wmap = pgrammar->syntax->synx->olabels;
145 wordIDs[0] = wordIDs[1] = MAXwordID;
146 wordIDptr = &wordIDs[0];
147 for (p = strtok(copy_of_trans, " "); p; p = strtok(NULL, " "))
148 {
149 for (i = 0; i < wmap->num_words; i++)
150 if (!strcmp(wmap->words[i], p))
151 {
152 *wordIDptr++ = (wordID)i;
153 break;
154 }
155 if (i == wmap->num_words)
156 {
157 wordIDs[0] = MAXwordID;
158 break;
159 }
160 }
161 *wordIDptr++ = MAXwordID;
162
163 /* printf("wordids:");
164 for(wordIDptr=&wordIDs[0]; *wordIDptr!=MAXwordID; wordIDptr++)
165 printf(" %d/%s", *wordIDptr, wmap->words[*wordIDptr]);
166 printf("\n"); */
167
168 if (wordIDs[0] == MAXwordID)
169 {
170 result_count = 0;
171 rc = ESR_SUCCESS;
172 }
173 else
174 {
175 rc = pgrammar->semproc->flush(pgrammar->semproc);
176 rc = pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), trans);
177 rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
178 wordIDs, semanticResults, &result_count);
179 }
180 }
181 if (rc != ESR_SUCCESS)
182 {
183 pfprintf(fout, "error (%s)\n\n", trans);
184 return rc;
185 }
186
187 if (result_count < 1)
188 {
189 pfprintf(fout, "no parse (%s)\n\n", trans);
190 }
191 else
192 {
193 key_count = 0xffff;
194 rc = SR_SemanticResultGetKeyCount(semanticResults[0], &key_count);
195 pfprintf(fout, "parse ok (%d results) (%s) (%d)\n", result_count, trans, key_count);
196 for (i = 0; i < result_count; i++)
197 display_results(semanticResults[i], fout);
198
199 for (i = 0; i < MAX_SEM_RESULTS; i++)
200 {
201 rc = semanticResults[i]->destroy(semanticResults[i]);
202 if (rc != ESR_SUCCESS)
203 return rc;
204 }
205 }
206 return ESR_SUCCESS;
207 }
208
209 /* tests the transcription against the grammar and then decided based on what was expected of the test
210 whether or not is it considered a pass or fail */
ParseTestSet(SR_Grammar * grammar,LCHAR * trans,LCHAR * key,LCHAR * ref,LCHAR * result,PFile * fout)211 ESR_ReturnCode ParseTestSet(SR_Grammar* grammar, LCHAR* trans, LCHAR* key, LCHAR* ref, LCHAR* result, PFile* fout)
212 {
213 size_t len;
214 ESR_ReturnCode rc;
215 int i, result_count;
216 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
217 LCHAR value[MAX_STR_LENGTH];
218
219 result_count = MAX_SEM_RESULTS;
220 for (i = 0; i < result_count; i++)
221 SR_SemanticResultCreate(&semanticResults[i]);
222
223 lstrtrim(trans);
224 /* check for multiple space separators! */
225 lstr_strip_multiple_spaces(trans);
226
227 pfprintf(fout, "checking (%s) ref(%s) res(%s)\n", trans, ref, result);
228 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
229 if (rc != ESR_SUCCESS)
230 return rc;
231
232 /*result file will contain
233 transcription | key | reference | result | PASSESD/FAILED */
234
235 if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
236 {
237 pfprintf(fout, "NO PARSE FOR: %s|%s|%s| |", trans, key, ref);
238 if (strcmp("FAIL", result) == 0)
239 pfprintf(fout, "PASSED (%s)\n", trans);
240 else
241 pfprintf(fout, "FAILED (%s)\n", trans);
242 }
243 else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
244 {
245 for (i = 0; i < result_count; i++)
246 {
247 len = MAX_STR_LENGTH;
248 if ((rc = semanticResults[i]->getValue(semanticResults[i], key, value, &len)) == ESR_SUCCESS)
249 {
250 pfprintf(fout, "%s|%s|%s|%s|", trans, key, ref, value);
251
252 if (strcmp(value, ref) == 0 && strcmp("PASS", result) == 0)
253 pfprintf(fout, "PASSED\n");
254 else
255 pfprintf(fout, "FAILED\n");
256 }
257 else
258 {
259 pfprintf(fout, "ERROR: %s, while checking key='%s'\n", ESR_rc2str(rc), key);
260 }
261 }
262
263 /*deallocate semantic results*/
264 for (i = 0; i < MAX_SEM_RESULTS; i++)
265 {
266 rc = semanticResults[i]->destroy(semanticResults[i]);
267 if (rc != ESR_SUCCESS)
268 return rc;
269 }
270 }
271 return ESR_SUCCESS;
272 }
273
main(int argc,char ** argv)274 int main(int argc, char **argv)
275 {
276 LCHAR trans[MAX_LINE_LENGTH];
277 SR_Grammar* grammar = NULL;
278 ESR_ReturnCode rc;
279 LCHAR base[P_PATH_MAX] = L("");
280 LCHAR infilename[P_PATH_MAX] = L("");
281 LCHAR inRTfilename[P_PATH_MAX] = L("");
282 LCHAR outfilename[P_PATH_MAX] = L("");
283 PFile *fin = NULL, *fout = NULL;
284 int i;
285 LCHAR *rootrule = L("myRoot"), *p;
286 Opts opts = { 0, 0 };
287
288 /*
289 * Initialize portable library.
290 */
291 CHKLOG(rc, PMemInit());
292
293 fin = PSTDIN;
294 fout = PSTDOUT;
295
296 if (argc < 3)
297 {
298 usage(argv[0]);
299 exit(EXIT_FAILURE);
300 }
301 for (i = 1; i < argc; ++i)
302 {
303 if (!LSTRCMP(argv[i], L("-base")))
304 {
305 ++i;
306 LSTRCPY(base, argv[i]);
307 }
308 else if (!LSTRCMP(argv[i], L("-in")))
309 {
310 ++i;
311 LSTRCPY(infilename, argv[i]);
312 }
313 else if (!LSTRCMP(argv[i], L("-out")))
314 {
315 ++i;
316 LSTRCPY(outfilename, argv[i]);
317 }
318 else if (!LSTRCMP(argv[i], L("-itest")))
319 {
320 ++i;
321 LSTRCPY(inRTfilename, argv[i]);
322 }
323 else if (!LSTRCMP(argv[i], L("-ids")))
324 {
325 opts.use_parse_by_string_ids = 1;
326 }
327 else if (!LSTRCMP(argv[i], L("-allids")))
328 {
329 opts.do_check_all_ids = 1;
330 opts.use_parse_by_string_ids = 1;
331 }
332 else
333 return usage(argv[0]);
334 }
335
336 CHK(rc, PLogInit(NULL, 0));
337
338 rc = SR_GrammarLoad(base, &grammar);
339 if (rc != ESR_SUCCESS)
340 goto CLEANUP;
341
342 if (*outfilename)
343 {
344 if ((fout = pfopen(outfilename, "w")) == NULL)
345 {
346 pfprintf(PSTDOUT, "Could not open file: %s\n", outfilename);
347 rc = 1;
348 goto CLEANUP;
349 }
350 }
351
352 if (opts.do_check_all_ids)
353 {
354 rc = Parse(grammar, NULL, fout, &opts);
355 }
356 else if (*infilename)
357 {
358 if (LSTRCMP(infilename, "-") == 0)
359 {
360 fin = PSTDIN;
361 }
362 else if ((fin = pfopen(infilename, "r")) == NULL)
363 {
364 pfprintf(PSTDOUT, "Could not open file: %s\n", infilename);
365 rc = 1;
366 goto CLEANUP;
367 }
368 for (;;)
369 {
370 if (pfgets(trans, MAX_LINE_LENGTH, fin) == NULL)
371 {
372 if (!pfeof(fin))
373 {
374 rc = ESR_READ_ERROR;
375 PLogError(ESR_rc2str(rc));
376 }
377 break;
378 }
379 if (trans[0] == '#') continue;
380 lstrtrim(trans);
381 /* check for multiple space separators! */
382 lstr_strip_multiple_spaces(trans);
383 pfprintf(fout, "Transcription: %s\n", trans);
384 if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
385 goto CLEANUP;
386 pfprintf(fout, "\n");
387 }
388 }
389 else if (*inRTfilename) /*using a test file*/
390 {
391 if ((fin = pfopen(inRTfilename, "r")) == NULL)
392 {
393 pfprintf(PSTDOUT, "Could not open test file: %s\n", inRTfilename);
394 rc = 1;
395 goto CLEANUP;
396 }
397
398 /*read through the test file parsing it into the variables
399 FORMAT: "the transciption" key "value"
400 */
401 while (ESR_TRUE)
402 {
403 if (0) rc = process_single_key_line(grammar, fin, fout);
404 else rc = process_multi_key_line(grammar, rootrule, fin, fout);
405 if (rc == ESR_READ_ERROR)
406 {
407 rc = ESR_SUCCESS;
408 break;
409 }
410 }
411 }
412 else
413 {
414 /* get some transcriptions from the user */
415 pfprintf(PSTDOUT, "\nSemantic Parser Test Program for esr (Nuance Communicaitions, 2007)\n");
416 pfprintf(PSTDOUT, "'qqq' to quit\n");
417
418 while (ESR_TRUE)
419 {
420 pfprintf(PSTDOUT, "> ");
421
422 if (!fgets(trans, MAX_LINE_LENGTH, PSTDIN))
423 break;
424 // remove trailing whitespace
425 for(p=&trans[0]; *p!=0 && *p!='\n' && *p!='\r'; p++) {}
426 *p=0;
427
428 if (!LSTRCMP("qqq", trans))
429 break;
430 else
431 if ((rc = Parse(grammar, trans, fout, &opts)) != ESR_SUCCESS)
432 goto CLEANUP;
433 }
434 }
435 CLEANUP:
436 if (fin && fin != PSTDIN)
437 pfclose(fin);
438 if (fout && fout != PSTDOUT)
439 pfclose(fout);
440 if (grammar) grammar->destroy(grammar);
441 PLogShutdown();
442 /* PANSIFileSystemDestroy();
443 PFileSystemDestroy();*/
444 PMemShutdown();
445 return rc;
446 }
447
process_single_key_line(SR_Grammar * grammar,PFile * fin,PFile * fout)448 ESR_ReturnCode process_single_key_line(SR_Grammar* grammar, PFile* fin, PFile* fout)
449 {
450 LCHAR* position;
451 LCHAR line[MAX_LINE_LENGTH];
452 LCHAR trans[MAX_LINE_LENGTH];
453 LCHAR key[MAX_LINE_LENGTH];
454 LCHAR refValue[MAX_LINE_LENGTH];
455 LCHAR result[MAX_LINE_LENGTH];
456 ESR_ReturnCode rc;
457
458 position = pfgets(line, MAX_LINE_LENGTH, fin);
459 if (line[0] == '#')
460 return ESR_SUCCESS;
461 if (!strncmp(line, "__END__", 7))
462 return ESR_READ_ERROR;
463 if (position == NULL)
464 {
465 if (pfeof(fin))
466 return ESR_READ_ERROR;
467 else
468 {
469 PLogError(L("ESR_READ_ERROR"));
470 return ESR_READ_ERROR;
471 }
472 }
473
474 //get the transcription to test
475 if ((position = strtok(line, "\"")) != NULL)
476 {
477 LSTRCPY(trans, position);
478 }
479 else
480 {
481 pfprintf(fout, "INVALID FORMAT for input line 1 \n");
482 rc = ESR_INVALID_ARGUMENT;
483 goto CLEANUP;
484 }
485
486 //get the key (meaning)
487 if ((position = strtok(NULL, " \t")) != NULL)
488 {
489 LSTRCPY(key, position);
490 }
491 else
492 {
493 pfprintf(fout, "INVALID FORMAT for input line 2\n");
494 rc = ESR_INVALID_ARGUMENT;
495 goto CLEANUP;
496 }
497
498 //get the expected return string
499 if ((position = strtok(NULL, "\"")) != NULL)
500 {
501 LSTRCPY(refValue, position);
502 }
503 else
504 {
505 pfprintf(fout, "INVALID FORMAT for input line 3\n");
506 rc = ESR_INVALID_ARGUMENT;
507 goto CLEANUP;
508 }
509
510 //get the expected result PASS/FAIL
511 //there is no need to write PASS, if nothing is written PASS is assumed
512 if ((position = strtok(NULL, " \t\r\n\"")) != NULL)
513 {
514 LSTRCPY(result, position);
515
516 if (strcmp(result, "PASS") != 0 && strcmp(result, "FAIL") != 0)
517 {
518 pfprintf(fout, "INVALID FORMAT for input line, use either PASS or FAIL\n");
519 rc = ESR_INVALID_ARGUMENT;
520 goto CLEANUP;
521 }
522
523 if ((rc = ParseTestSet(grammar, trans, key, refValue, result, fout)) != ESR_SUCCESS)
524 goto CLEANUP;
525 }
526 else
527 {
528 if ((rc = ParseTestSet(grammar, trans, key, refValue, "PASS", fout)) != ESR_SUCCESS)
529 goto CLEANUP;
530 }
531 rc = ESR_SUCCESS;
532 CLEANUP:
533 return rc;
534 }
535
process_multi_key_line(SR_Grammar * grammar,const LCHAR * rootrule,PFile * fin,PFile * fout)536 ESR_ReturnCode process_multi_key_line(SR_Grammar* grammar, const LCHAR* rootrule, PFile* fin, PFile* fout)
537 {
538 LCHAR *position, *p;
539 LCHAR line[MAX_LINE_LENGTH];
540 LCHAR trans[MAX_LINE_LENGTH];
541 LCHAR keyvals[MAX_LINE_LENGTH];
542 ESR_ReturnCode rc;
543 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
544 LCHAR refkey[MAX_LINE_LENGTH];
545 LCHAR refval[MAX_LINE_LENGTH], value[MAX_STR_LENGTH];
546 size_t i, j, len;
547 size_t result_count;
548
549 position = pfgets(line, MAX_LINE_LENGTH, fin);
550 if (line[0] == '#')
551 return ESR_SUCCESS;
552 if (!strncmp(line, "__END__", 7))
553 return ESR_READ_ERROR;
554 if (position == NULL)
555 {
556 if (pfeof(fin))
557 return ESR_READ_ERROR;
558 else
559 {
560 PLogError(L("ESR_READ_ERROR"));
561 return ESR_READ_ERROR;
562 }
563 }
564
565 /* we're trying to parse
566 Hello there : BONJOUR
567 */
568 p = strtok(line, ":");
569 LSTRCPY(trans, p);
570 /* strip trailing spaces */
571 for (len = strlen(trans); len > 0 && trans[len-1] == ' '; len--)
572 trans[len-1] = 0;
573
574 p = strtok(NULL, "\n\r");
575 /* strip leading spaces */
576 while (*p == ' ' || *p == '\t') p++;
577 LSTRCPY(keyvals, p);
578
579 result_count = MAX_SEM_RESULTS;
580 for (i = 0; i < result_count; i++)
581 SR_SemanticResultCreate(&semanticResults[i]);
582
583 /* pfprintf(fout,"checking (%s) ref(%s)\n", trans, keyvals); */
584 rc = grammar->checkParse(grammar, trans, semanticResults, (size_t*) & result_count);
585 if (rc != ESR_SUCCESS)
586 return rc;
587
588 /*result file will contain
589 transcription | key | reference | result | PASSESD/FAILED */
590
591 if (result_count < 1) /*failed to parse, but this could still be a pass if you expected a failure*/
592 {
593 pfprintf(fout, "%s|%s| |", trans, keyvals);
594 if (!strcmp("FAIL", keyvals) || !strcmp(keyvals, "-"))
595 pfprintf(fout, "PASSED\n");
596 else
597 pfprintf(fout, "FAILED\n");
598 }
599 else /*parsed, look at what was expected, what was returned and which of PASS/FAIL is expected */
600 {
601 size_t size, len;
602 LCHAR* keys_available[MAX_KEYS]; /* array of pointers to strings */
603 size = MAX_KEYS;
604 rc = semanticResults[0]->getKeyList(semanticResults[0], (LCHAR**) & keys_available, &size);
605
606 for (p = strtok(keyvals, ";"); p; p = strtok(NULL, ";"))
607 {
608 sprintf(refkey, "%s.%s", rootrule, p);
609 p = strchr(refkey, '=');
610 assert(p);
611 *p = 0;
612 p++;
613 if (*p == '\'') p++;
614 LSTRCPY(refval, p);
615 if (refval[ strlen(refval)-1] == '\'') refval[strlen(refval)-1] = 0;
616
617 for (i = 0; i < result_count; i++)
618 {
619 len = MAX_STR_LENGTH;
620 for (j = 0; j < size; j++)
621 if (!strcmp(keys_available[j], refkey)) break;
622 if (j < size)
623 rc = semanticResults[i]->getValue(semanticResults[i], refkey, value, &len);
624 else
625 {
626 LSTRCPY(value, "<NOSUCHKEY>");
627 rc = ESR_NO_MATCH_ERROR;
628 }
629 pfprintf(fout, "%s|%s|%s|%s|", trans, refkey, refval, value);
630 if (strcmp(value, refval) == 0)
631 pfprintf(fout, "PASSED\n");
632 else
633 pfprintf(fout, "FAILED\n");
634 }
635 }
636
637 /*deallocate semantic results*/
638 for (i = 0; i < MAX_SEM_RESULTS; i++)
639 {
640 rc = semanticResults[i]->destroy(semanticResults[i]);
641 if (rc != ESR_SUCCESS)
642 PLogError("%s while destroying", ESR_rc2str(rc));
643 }
644 }
645 return ESR_SUCCESS;
646 }
647
648