• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  SemanticProcessorImpl.c                                                  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "SR_SemanticProcessor.h"
21 #include "SR_SemanticProcessorImpl.h"
22 #include "SR_SemanticGraphImpl.h"
23 #include "SR_SemanticResultImpl.h"
24 #include "ESR_ReturnCode.h"
25 #include "plog.h"
26 static const char* MTAG = __FILE__;
27 
28 /**************************************************
29 
30     Internal data structures and functions
31 
32   ************************************************/
33 
34 /**
35  * A partial path holds olables from a start arc, until it reaches a
36  * fork (i.e. multiple next arc possiblities). For each possibility
37  * a new partial path is created and concatenated to this one.
38  */
39 typedef struct sem_partial_path_t
40 {
41   struct sem_partial_path_t* next; /* linked list */
42   arc_token* arc_for_pp;           /* which arc was taken */
43 }
44 sem_partial_path;
45 
46 #define DEBUG_CPF 0
47 #if DEBUG_CPF
48 static arc_token* debug_base_arc_token = 0;
49 static int debug_depth = 0;
spaces(int n)50 static const char* spaces(int n) {
51   const char* sp = "                                                         ";
52   int nsp = strlen(sp);
53   if (n > nsp) n = nsp;
54   return sp + nsp - n;
55 }
56 #endif
57 
58 /**
59  * A holder for accumulated scripts
60  */
61 typedef struct script_t
62 {
63   const LCHAR* expression;
64   const LCHAR* ruleName;
65 }
66 script;
67 
68 /**
69  * A list of accumulated scripts
70  */
71 typedef struct script_list_t
72 {
73   script list[MAX_SCRIPTS];
74   size_t num_scripts;
75 }
76 script_list;
77 
78 static const LCHAR* WORD_NOT_FOUND = L("word_not_found");
79 
80 /**
81  * Initialize the list of partial paths
82  */
83 static ESR_ReturnCode sem_partial_path_list_init(sem_partial_path* heap, int nheap);
84 static sem_partial_path* sem_partial_path_create(sem_partial_path* heap);
85 static ESR_ReturnCode sem_partial_path_free(sem_partial_path* heap, sem_partial_path* path);
86 static void sem_partial_path_print(sem_partial_path* path, sem_partial_path* paths, int npaths, wordmap* ilabels);
87 
88 /**
89  * Look up the word string given the id
90  */
91 static const LCHAR* lookUpWord(SR_SemanticGraphImpl* semgraph, wordID wdid);
92 
93 /**
94  * Look up the actual script string given the label
95  */
96 static const LCHAR* lookUpScript(SR_SemanticGraphImpl* semgraph, const LCHAR* script_label);
97 
98 /**
99  * Recursively accumulate the scripts
100  */
101 static ESR_ReturnCode accumulate_scripts(SR_SemanticGraphImpl* semgraph, script_list* scripts, sem_partial_path* path_root);
102 
103 static ESR_ReturnCode interpretScripts(SR_SemanticProcessorImpl* semproc, LCHAR* scripts, SR_SemanticResult** result);
104 
105 
SR_SemanticProcessorCreate(SR_SemanticProcessor ** self)106 ESR_ReturnCode SR_SemanticProcessorCreate(SR_SemanticProcessor** self)
107 {
108     SR_SemanticProcessorImpl* impl;
109     ESR_ReturnCode rc;
110 
111     if (self == NULL)
112     {
113         PLogError(L("ESR_INVALID_ARGUMENT"));
114         return ESR_INVALID_ARGUMENT;
115     }
116     impl = NEW(SR_SemanticProcessorImpl, MTAG);
117     if (impl == NULL)
118     {
119         PLogError(L("ESR_OUT_OF_MEMORY"));
120         return ESR_OUT_OF_MEMORY;
121     }
122     if ((rc = LA_Init(&impl->analyzer)) != ESR_SUCCESS)
123         goto CLEANUP;
124     if ((rc = EP_Init(&impl->parser)) != ESR_SUCCESS)
125         goto CLEANUP;
126     if ((rc = ST_Init(&impl->symtable)) != ESR_SUCCESS)
127         goto CLEANUP;
128     if ((rc = EE_Init(&impl->eval)) != ESR_SUCCESS)
129         goto CLEANUP;
130     impl->acc_scripts = MALLOC(sizeof(LCHAR) * MAX_SCRIPT_LEN, NULL);
131     if (impl->acc_scripts == NULL)
132     {
133         rc = ESR_OUT_OF_MEMORY;
134         PLogError(ESR_rc2str(rc));
135         goto CLEANUP;
136     }
137 
138     impl->Interface.destroy = &SR_SemanticProcessor_Destroy;
139     impl->Interface.checkParse = &SR_SemanticProcessor_CheckParse;
140     impl->Interface.checkParseByWordID = &SR_SemanticProcessor_CheckParseByWordID;
141     impl->Interface.setParam = &SR_SemanticProcessor_SetParam;
142     impl->Interface.flush = &SR_SemanticProcessor_Flush;
143 
144 
145     *self = (SR_SemanticProcessor*) impl;
146     return ESR_SUCCESS;
147 CLEANUP:
148     impl->Interface.destroy(&impl->Interface);
149     return rc;
150 }
151 
SR_SemanticProcessor_Destroy(SR_SemanticProcessor * self)152 ESR_ReturnCode SR_SemanticProcessor_Destroy(SR_SemanticProcessor* self)
153 {
154     SR_SemanticProcessorImpl* impl = (SR_SemanticProcessorImpl*) self;
155 
156     if (self == NULL)
157     {
158         PLogError(L("ESR_INVALID_ARGUMENT"));
159         return ESR_INVALID_ARGUMENT;
160     }
161 
162     LA_Free(impl->analyzer);
163     EP_Free(impl->parser);
164     ST_Free(impl->symtable);
165     EE_Free(impl->eval);
166     if (impl->acc_scripts != NULL)
167     {
168         FREE(impl->acc_scripts);
169         impl->acc_scripts = NULL;
170     }
171     FREE(impl);
172 
173     return ESR_SUCCESS;
174 }
175 
176 
append_with_check(LCHAR ** dst,const LCHAR src,const LCHAR * end)177 ESR_ReturnCode append_with_check(LCHAR** dst, const LCHAR src, const LCHAR* end)
178 {
179     if (*dst < end)
180     {
181         **dst = src;
182         ++(*dst);
183         return ESR_SUCCESS;
184     }
185     PLogError(L("ESR_BUFFER_OVERFLOW"));
186     return ESR_BUFFER_OVERFLOW;
187 }
188 
LSTRNCHR2(const LCHAR * text,LCHAR c,LCHAR c2,size_t len)189 static const LCHAR* LSTRNCHR2(const LCHAR* text, LCHAR c, LCHAR c2, size_t len)
190 {
191     for (; *text != c && *text != c2 && len > 0 && *text; text++, len--)
192         ;
193     if (len) return text;
194     else return NULL;
195 }
196 
get_next_token_len(const char * expr)197 static size_t get_next_token_len(const char* expr)
198 {
199     const char *p;
200 
201     if (IS_OPERATOR(expr))
202     {
203         return 1;
204     }
205     else if (*expr == ';')
206     {
207         return 1;
208     }
209     else if (*expr == '\'')
210     {
211         /* a literal */
212         for (p = expr; *p != '\0'; p++)
213         {
214             if (*p == '\\' && *(p + 1) == '\'')
215             {
216                 ++p;
217                 continue;
218             }
219             if (p > expr && *p == '\'')
220             {
221                 ++p;
222                 break;
223             }
224         }
225         return p -expr;
226     }
227     else
228     {
229         for (p = expr; *p != '\0'; p++)
230         {
231             if (*p == '(')
232             {
233                 ++p;
234                 break;
235             }
236             else if (IS_OPERATOR(p) || *p == ';')
237             {
238                 break;
239             }
240         }
241         return p -expr;
242     }
243 }
244 
245 #define firstWord(transcription) transcription
246 #define nextWord(transcription)  (transcription && *transcription ? &transcription[LSTRLEN(transcription)+1] : transcription)
247 /* assumption is that transcription has been prepared (word split by NULL,
248    transcription ends with double NULL */
249 
checkpath_forwardByWordID(SR_SemanticGraphImpl * semgraph,sem_partial_path * heap,arc_token * atoken_start,sem_partial_path * pp,const wordID * wordIDs)250 static ESR_ReturnCode checkpath_forwardByWordID(SR_SemanticGraphImpl* semgraph,
251         sem_partial_path* heap,
252         arc_token* atoken_start,
253         sem_partial_path *pp,
254         const wordID* wordIDs)
255 {
256     arc_token* atok_use;
257     sem_partial_path* pp_branch;
258     arc_token* atok;
259     const wordID* currentWord = wordIDs;
260 
261     /*****************
262      * Recursive Part (operate on the next arc or the branch)
263      *****************/
264     for (atok = atoken_start; atok; atok = ARC_TOKEN_PTR(semgraph->arc_token_list, atok->next_token_index))
265     {
266 #if DEBUG_CPF
267         printf("%strying arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
268                atok->ilabel, atok->ilabel!=MAXwordID?semgraph->ilabels->words[atok->ilabel]:"max",   atok->olabel);
269 #endif
270         atok_use = NULL;
271         currentWord = wordIDs;
272 
273         if (atok->ilabel < semgraph->ilabels->num_slots && atok->ilabel != WORD_EPSILON_LABEL &&
274                 wordmap_whether_in_rule(semgraph->ilabels, *currentWord, atok->ilabel))
275         {
276             /* atok->ilabel is the slotid */
277             atok_use = arc_tokens_find_ilabel(semgraph->arc_token_list, semgraph->arcs_for_slot[atok->ilabel], *currentWord);
278             if (!atok_use)
279             {
280                 arc_token* a;
281                 PLogError(L("ESR_INVALID_STATE: finding wdid %d in slot %d"), *currentWord, atok->ilabel);
282                 for (a = semgraph->arcs_for_slot[atok->ilabel]; 0 && a; a = ARC_TOKEN_PTR(semgraph->arc_token_list, a->next_token_index))
283                 {
284                     PLogError(L("a %x ilabel %d olabel %d"), a, a->ilabel, a->olabel);
285                 }
286                 return ESR_INVALID_STATE;
287             }
288             else
289                 ++currentWord;
290         }
291         else if (*currentWord != MAXwordID && atok->ilabel == *currentWord)
292         {
293             ++currentWord;
294             atok_use = atok;
295         }
296         else if (atok->ilabel == WORD_EPSILON_LABEL) /* more eps transitions */
297             atok_use = atok;
298 
299         if (atok_use == NULL)
300             continue;
301         else {
302             arc_token* atokfna = ARC_TOKEN_PTR(semgraph->arc_token_list, atok->first_next_arc);
303             pp_branch = sem_partial_path_create(heap);
304 
305 #if DEBUG_CPF
306             printf("%smatched arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
307                    atok->ilabel, semgraph->ilabels->words[atok->ilabel],   atok->olabel);
308 #endif
309 
310             if (!pp_branch)
311                 return ESR_INVALID_STATE;
312             pp->next = pp_branch;
313             pp->arc_for_pp = atok_use;
314 
315             if (atok->first_next_arc == ARC_TOKEN_NULL && *currentWord == MAXwordID)
316                 return ESR_SUCCESS;
317             else if (atokfna && atokfna->ilabel==MAXwordID && atokfna->olabel==MAXwordID && *currentWord==MAXwordID)
318                 return ESR_SUCCESS;
319             else
320             {
321 #if DEBUG_CPF
322                 sem_partial_path_print(pp_branch, &sem_partial_paths[0], MAX_SEM_PARTIAL_PATHS, semgraph->ilabels);
323                 debug_depth += 2;
324 #endif
325                 ESR_ReturnCode rc = checkpath_forwardByWordID(semgraph, heap, atokfna, pp_branch, currentWord);
326 #if DEBUG_CPF
327                 debug_depth -= 2;
328 #endif
329                 if (rc == ESR_SUCCESS)
330                     return ESR_SUCCESS;
331                 else if (rc == ESR_INVALID_STATE)
332                 {
333                     /* if out-of-memory of other problem, then just abort */
334                     return ESR_INVALID_STATE;
335                 }
336                 else
337                 {
338                     /* need to uncharge through epsilons, until pp->next==pp_branch */
339                     // sem_partial_path* qq = pp->next;
340                     sem_partial_path_free(heap, pp->next);
341                     pp->arc_for_pp = NULL;
342                     // for (qq = pp->next; qq != pp_branch; qq = qq->next)  sem_partial_path_free(qq);
343                     pp->next = NULL;
344                 }
345             }
346         }
347 #if DEBUG_CPF
348         printf("%sdone trying arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
349                atok->ilabel, semgraph->ilabels->words[atok->ilabel],   atok->olabel);
350 #endif
351     } /* end for atok .. */
352     return ESR_NO_MATCH_ERROR;
353 }
354 
355 
checkpath_forward(SR_SemanticGraphImpl * semgraph,sem_partial_path * heap,arc_token * atoken_start,sem_partial_path * pp,const LCHAR * transcription)356 static ESR_ReturnCode checkpath_forward(SR_SemanticGraphImpl* semgraph,
357         sem_partial_path* heap,
358         arc_token* atoken_start,
359         sem_partial_path *pp,
360         const LCHAR* transcription)
361 {
362     arc_token* atok_use;
363     wordID wdID;
364     sem_partial_path* pp_branch;
365     arc_token* atok;
366     const LCHAR* transp;
367 
368     /*****************/
369     /* Recursive Part (operate on the next arc or the branch)*/
370     /*****************/
371     for (atok = atoken_start; atok; atok = ARC_TOKEN_PTR(semgraph->arc_token_list, atok->next_token_index))
372     {
373 #if DEBUG_CPF
374         printf("%strying arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
375                atok->ilabel, atok->ilabel!=MAXwordID?semgraph->ilabels->words[atok->ilabel]:"max",   atok->olabel);
376 #endif
377 
378         atok_use = NULL;
379         transp = transcription;
380         wdID = wordmap_find_index(semgraph->ilabels, firstWord(transp));
381 
382         if (atok->ilabel < semgraph->ilabels->num_slots && atok->ilabel != WORD_EPSILON_LABEL &&
383                 wordmap_whether_in_rule(semgraph->ilabels, wdID, atok->ilabel))
384         {
385             /* atok->ilabel is the slotid */
386             atok_use = arc_tokens_find_ilabel(semgraph->arc_token_list, semgraph->arcs_for_slot[atok->ilabel], wdID);
387             if (!atok_use)
388             {
389                 arc_token* a;
390                 PLogError(L("ESR_INVALID_STATE: finding wdid %d in slot %d"), wdID, atok->ilabel);
391                 for (a = semgraph->arcs_for_slot[atok->ilabel]; 0 && a; a = ARC_TOKEN_PTR(semgraph->arc_token_list, a->next_token_index))
392                 {
393                     PLogError(L("a %x ilabel %d olabel %d"), a, a->ilabel, a->olabel);
394                 }
395                 return ESR_INVALID_STATE;
396             }
397             else {
398                 transp = nextWord(transp);
399                 wdID = wordmap_find_index(semgraph->ilabels, firstWord(transp));
400             }
401         }
402         else if (wdID != MAXwordID && atok->ilabel == wdID)
403         {
404             transp = nextWord(transp);
405             wdID = wordmap_find_index(semgraph->ilabels, firstWord(transp));
406             atok_use = atok;
407         }
408         else if (atok->ilabel == WORD_EPSILON_LABEL) /* more eps transitions */
409             atok_use = atok;
410 
411         if (atok_use == NULL)
412             continue;
413         else {
414             arc_token* atokfna = ARC_TOKEN_PTR(semgraph->arc_token_list, atok->first_next_arc);
415             pp_branch = sem_partial_path_create(heap);
416 
417 #if DEBUG_CPF
418             printf("%smatched arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
419                    atok->ilabel, semgraph->ilabels->words[atok->ilabel],   atok->olabel);
420 #endif
421 
422             if (!pp_branch)
423                 return ESR_INVALID_STATE;
424             pp->next = pp_branch;
425             pp->arc_for_pp = atok_use;
426             if (atok->first_next_arc==ARC_TOKEN_NULL && *transp==0)
427                 return ESR_SUCCESS;
428             else if (atokfna && atokfna->ilabel==MAXwordID && atokfna->olabel==MAXwordID && *transp==0)
429                 return ESR_SUCCESS;
430             else
431             {
432 #if DEBUG_CPF
433                 sem_partial_path_print(pp_branch, &sem_partial_paths[0], MAX_SEM_PARTIAL_PATHS, semgraph->ilabels);
434                 debug_depth += 2;
435 #endif
436                 ESR_ReturnCode rc = checkpath_forward(semgraph, heap, atokfna, pp_branch, transp);
437 #if DEBUG_CPF
438                 debug_depth -= 2;
439 #endif
440                 if (rc == ESR_SUCCESS)
441                     return rc;
442                 else if (rc == ESR_INVALID_STATE)
443                 {
444                     /* if out-of-memory of other problem, then just abort */
445                     return ESR_INVALID_STATE;
446                 }
447                 else
448                 {
449                     /* need to uncharge through epsilons, until pp->next==pp_branch */
450                     // sem_partial_path* qq = pp->next;
451                     sem_partial_path_free(heap, pp->next);
452                     pp->arc_for_pp = NULL;
453                     // for (qq = pp->next; qq != pp_branch; qq = qq->next)  sem_partial_path_free(qq);
454                     pp->next = NULL;
455                 }
456             }
457         }
458 #if DEBUG_CPF
459         printf("%sdone trying arc %d %p ilabel%d(%s) olabel %d\n", spaces(debug_depth), atok-debug_base_arc_token, atok,
460                atok->ilabel, semgraph->ilabels->words[atok->ilabel],   atok->olabel);
461 #endif
462     } /* end for atok .. */
463     return ESR_NO_MATCH_ERROR;
464 }
465 
466 /**
467  * Parse the graph
468  */
SR_SemanticProcessor_CheckParseByWordID(SR_SemanticProcessor * self,SR_SemanticGraph * graph,wordID * wordIDs,SR_SemanticResult ** results,size_t * resultCount)469 ESR_ReturnCode SR_SemanticProcessor_CheckParseByWordID(SR_SemanticProcessor* self,
470                                                        SR_SemanticGraph* graph,
471                                                        wordID* wordIDs,
472                                                        SR_SemanticResult** results,
473                                                        size_t* resultCount)
474 {
475     sem_partial_path *path_root;
476     script_list raw_scripts_buf;
477     LCHAR lhs[MAX_STRING_LEN];
478     LCHAR meaning[MAX_STRING_LEN];      /* special key */
479     LCHAR ruleName[32];
480     size_t i, j, size, resultIdx;
481     LCHAR* dst = NULL;
482     LCHAR* p;
483     size_t tokenLen = 0;
484     const LCHAR* src;
485     HashMap* hashmap = NULL;
486     ESR_ReturnCode rc;
487     ESR_BOOL containsKey;
488     sem_partial_path heap[MAX_SEM_PARTIAL_PATHS];
489     SR_SemanticProcessorImpl* semproc  = (SR_SemanticProcessorImpl*) self;
490     SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) graph;
491 
492     LSTRCPY(ruleName, L(""));
493     CHKLOG(rc, sem_partial_path_list_init(heap, sizeof(heap)/sizeof(heap[0])));
494     path_root = sem_partial_path_create(heap);
495     if (!path_root)
496     {
497         rc = ESR_INVALID_STATE;
498         goto CLEANUP;
499     }
500 
501     /**
502      * Parse the graph
503      */
504     rc = checkpath_forwardByWordID(semgraph, heap, &semgraph->arc_token_list[0], path_root,
505                                    wordIDs);
506     if (rc == ESR_NO_MATCH_ERROR)
507     {
508         *resultCount = 0;
509         return ESR_SUCCESS; /* did not parse */
510     }
511     else if (rc == ESR_SUCCESS)
512     {
513         if (*resultCount > 0)
514             *resultCount = 1;
515         else
516         {
517             /**
518              * If the array to hold the results is not big enough,
519              * then tell the user right away by returning ESR_BUFFER_OVERFLOW
520        with the size required returned in resultCount */
521             rc = ESR_BUFFER_OVERFLOW;
522             PLogError(ESR_rc2str(rc));
523             goto CLEANUP;
524         }
525     }
526     else if (rc == ESR_INVALID_STATE)
527         goto CLEANUP;
528 
529 #if DEBUG_CPF
530     sem_partial_path_print(path_root, &sem_partial_paths[0], MAX_SEM_PARTIAL_PATHS,semgraph->ilabels);
531 #endif
532 
533     /* create the array of Semantic Result Pointers */
534     for (resultIdx = 0; resultIdx < *resultCount; resultIdx++)
535     {
536         raw_scripts_buf.num_scripts = 0;
537         for (i = 0; i < MAX_SCRIPTS; i++)
538         {
539             raw_scripts_buf.list[i].expression = 0;
540             raw_scripts_buf.list[i].ruleName = 0;
541         }
542 
543         /*
544          * Go through the partial paths which were successful and accumulate the scripts
545          * that you encountered (BUGGY)
546          */
547         CHKLOG(rc, accumulate_scripts(semgraph, &raw_scripts_buf, path_root));
548         CHKLOG(rc, sem_partial_path_free(heap, path_root));
549 
550         /*pfprintf(PSTDOUT,"Accumulated scripts\n");*/
551 
552         /*
553          * Prepare the scripts for processing, in other words, make them "nice".
554          * What I mean by making them nice is to do stuff like:
555          *
556          * if ruleName is:   root}
557          *    expression is: meaning='hello';meaning=meaning+' '+'world';
558          *
559          * what I want to accumulate is
560          *    root.meaning='hello';root.meaning=root.meaning+' '+'world';
561          *
562          * I am basically replacing END_SCOPE_MARKER with '.'  and inserting 'root.'
563          * before every lhs identifier.
564          *
565          */
566         for (dst = &semproc->acc_scripts[0], semproc->acc_scripts[0] = '\0', i = 0; i < raw_scripts_buf.num_scripts; ++i)
567         {
568             if (raw_scripts_buf.list[i].ruleName && raw_scripts_buf.list[i].expression &&
569                     raw_scripts_buf.list[i].ruleName != WORD_NOT_FOUND &&
570                     raw_scripts_buf.list[i].expression != WORD_NOT_FOUND)
571             {
572                 if (!LSTRCMP(raw_scripts_buf.list[i].expression, L(";")))
573                     continue;
574                 /* set the rule name in a temporary buffer and in the dst */
575                 src = raw_scripts_buf.list[i].ruleName;
576                 p = ruleName;
577                 while (*src && *src != END_SCOPE_MARKER) /* trim off the trailing closing brace END_SCOPE_MARKER */
578                 {
579                     CHKLOG(rc, append_with_check(&dst, *src, &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
580                     CHKLOG(rc, append_with_check(&p, *src, &ruleName[31]));
581                     ++src;
582                 }
583 
584 
585                 /* put a dot after the rule name, and before the lhs */
586                 CHKLOG(rc, append_with_check(&dst, L('.'), &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
587                 CHKLOG(rc, append_with_check(&p, L('.'), &ruleName[31]));
588 
589                 /* terminate the ruleName string */
590                 CHKLOG(rc, append_with_check(&p, 0, &ruleName[31]));
591 
592                 /* append the rest of the expression */
593                 src = raw_scripts_buf.list[i].expression;
594 
595                 while (ESR_TRUE)
596                 {
597                     /* get the LHS identifier, append to dst, and store temporarily
598             in lhs buffer*/
599                     p = lhs;
600                     while (*src && *src != '=')
601                     {
602                         CHKLOG(rc, append_with_check(&dst, *src, &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
603                         CHKLOG(rc, append_with_check(&p, *src, &lhs[MAX_STRING_LEN-1]));
604                         ++src;
605                     }
606                     /* terminate the lhs string */
607                     CHKLOG(rc, append_with_check(&p, 0, &lhs[MAX_STRING_LEN-1]));
608 
609                     /* prepend every occurrence of the LHS identifier with 'ruleName.'*/
610                     for (; *src && *src != ';'; src += tokenLen)
611                     {
612                         const LCHAR* p2;
613 
614                         tokenLen = get_next_token_len(src);
615                         if (IS_LOCAL_IDENTIFIER(src, tokenLen)  /* || !LSTRCMP(token, lhs) */)
616                         {
617                             /* use p to copy stuff now */
618                             p = ruleName;
619                             while (*p)
620                             {
621                                 /* prepend the rule name to the identifier */
622                                 CHKLOG(rc, append_with_check(&dst, *p, &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
623                                 ++p;
624                             }
625                         }
626                         for (p2 = src; p2 < src + tokenLen; ++p2)
627                             CHKLOG(rc, append_with_check(&dst, *p2, &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
628 
629                     }
630 
631                     /*
632                      * In an expression there may be several statements, each perhaps with a
633                      * new LHS identifier
634                      */
635 
636                     /* skip extra semicolons */
637                     while (*src == ';')
638                         ++src;
639                     /* skip whitespace */
640                     while (isspace(*src))
641                         ++src;
642 
643                     if (!*src)
644                     {
645                         /* if end of the expression */
646                         /* terminate the eScript expression properly */
647                         CHKLOG(rc, append_with_check(&dst, L(';'), &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
648                         *dst = '\0';/* terminate the string, DO NOT DO ++ !!! possibility of next loop iteration
649                                        which will concatenate to the dst string */
650                         break;
651                     }
652                     else
653                     {
654                         /* concat a single semi-colon */
655                         CHKLOG(rc, append_with_check(&dst, L(';'), &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
656                         p = ruleName;
657                         while (*p)
658                         {
659                             /* prepend the rule name for the new statement */
660                             CHKLOG(rc, append_with_check(&dst, *p, &semproc->acc_scripts[MAX_SCRIPT_LEN-1]));
661                             ++p;
662                         }
663                     }
664                 }
665             }
666         }
667         if (0) PLogMessage( L("Accumulated Scripts for:\n%s"), semproc->acc_scripts);
668         if (&results[resultIdx] != NULL) /* SemanticResultImpl assumed to have been created externally */
669             interpretScripts(semproc, semproc->acc_scripts, &results[resultIdx]);
670 
671         /**
672          * Fill in the 'meaning', if it is not there
673          *  map 'ROOT.meaning' to 'meaning'
674          *
675          * NOTE: I am reusing some vars even though the names are a little bit inappropriate.
676          */
677         hashmap = ((SR_SemanticResultImpl*)results[resultIdx])->results;
678 
679         LSTRCPY(meaning, L("meaning"));
680         CHKLOG(rc, hashmap->containsKey(hashmap, meaning, &containsKey));
681         if (!containsKey)
682         {
683             LSTRCPY(meaning, ruleName); /* the last rule name encountered is always the root */
684             LSTRCAT(meaning, L("meaning"));
685             CHKLOG(rc, hashmap->containsKey(hashmap, meaning, &containsKey));
686 
687             if (containsKey)
688             {
689                 CHKLOG(rc, hashmap->get(hashmap, meaning, (void **)&p));
690                 /* create a new memory location to hold the meaning... not the same as the other cause
691          I do not want memory destroy problems */
692                 /* add one more space */
693                 dst = MALLOC(sizeof(LCHAR) * (LSTRLEN(p) + 1), L("semproc.meaning"));
694                 if (dst == NULL)
695                 {
696                     rc = ESR_OUT_OF_MEMORY;
697                     PLogError(ESR_rc2str(rc));
698                     goto CLEANUP;
699                 }
700                 LSTRCPY(dst, p);
701                 rc = hashmap->put(hashmap, L("meaning"), dst);
702                 if (rc != ESR_SUCCESS)
703                 {
704                     FREE(dst);
705                     PLogError(ESR_rc2str(rc));
706                     goto CLEANUP;
707                 }
708                 dst = NULL;
709             }
710             else
711             {
712                 /*
713                  * No meaning was provided, so just concat all the values that are associated with the ROOT rule
714                  * (key name begins with ROOT)
715                  */
716                 meaning[0] = 0;
717                 CHKLOG(rc, hashmap->getSize(hashmap, &size));
718                 for (j = 0; j < size; j++)
719                 {
720                     CHKLOG(rc, hashmap->getKeyAtIndex(hashmap, j, &p));
721                     if (LSTRSTR(p, ruleName) == p) /* key name begins with root ruleName */
722                     {
723                         CHKLOG(rc, hashmap->get(hashmap, p, (void **)&dst));
724                         if (meaning[0] != 0) /* separate vals with space */
725                         {
726                             if (LSTRLEN(meaning) + 1 < MAX_STRING_LEN)
727                                 LSTRCAT(meaning, L(" "));
728                             /* chopping the meaning is harmless */
729                         }
730                         if (LSTRLEN(meaning) + LSTRLEN(dst) < MAX_STRING_LEN)
731                         {
732                             /* strcat a max of 32 chars */
733                             LCHAR* p, *pp;
734                             for (pp = &meaning[0]; *pp != 0; pp++) ; /* scan to the end */
735                             for (p = dst; *p != 0 && p - dst < 32;) *pp++ = *p++; /* catenate up to 32 chars */
736                             *pp++ = 0; /* null terminate */
737                             /* LSTRCAT(meaning,dst); */
738                         }
739                         /* chopping the meaning is harmless */
740                     }
741                 }
742                 if (meaning[0] != 0)
743                 {
744                     dst = MALLOC(sizeof(LCHAR) * (LSTRLEN(meaning) + 1), L("semproc.meaning"));
745                     if (dst == NULL)
746                     {
747                         rc = ESR_OUT_OF_MEMORY;
748                         PLogError(ESR_rc2str(rc));
749                         goto CLEANUP;
750                     }
751                     LSTRCPY(dst, meaning);
752                     rc = hashmap->put(hashmap, L("meaning"), dst);
753                     if (rc != ESR_SUCCESS)
754                     {
755                         FREE(dst);
756                         PLogError(ESR_rc2str(rc));
757                         goto CLEANUP;
758                     }
759                     dst = NULL;
760                 }
761             }
762         }
763     }
764 
765     return ESR_SUCCESS;
766 CLEANUP:
767     return rc;
768 }
769 
770 /**
771  * Parse the graph
772  */
SR_SemanticProcessor_CheckParse(SR_SemanticProcessor * self,SR_SemanticGraph * graph,const LCHAR * transcription,SR_SemanticResult ** results,size_t * resultCount)773 ESR_ReturnCode SR_SemanticProcessor_CheckParse(SR_SemanticProcessor* self,
774                                                SR_SemanticGraph* graph,
775                                                const LCHAR* transcription,
776                                                SR_SemanticResult** results,
777                                                size_t* resultCount)
778 {
779     sem_partial_path *path_root;
780     script_list raw_scripts_buf;
781     LCHAR acc_scripts[MAX_SCRIPT_LEN];  /* the accumulated scripts */
782     LCHAR lhs[MAX_STRING_LEN];
783     LCHAR meaning[MAX_STRING_LEN];      /* special key */
784     LCHAR ruleName[MAX_STRING_LEN];
785     LCHAR prepared_transcription[MAX_STRING_LEN+1]; /*for final double null */
786     size_t i, j, size, resultIdx;
787     LCHAR* dst = NULL;
788     LCHAR* p = NULL;
789     size_t tokenLen = 0;
790     const LCHAR* src;
791     HashMap* hashmap = NULL;
792     ESR_ReturnCode rc;
793     ESR_BOOL containsKey;
794     sem_partial_path heap[MAX_SEM_PARTIAL_PATHS];
795     SR_SemanticProcessorImpl* semproc  = (SR_SemanticProcessorImpl*) self;
796     SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) graph;
797 
798     LSTRCPY(ruleName, L(""));
799     CHKLOG(rc, sem_partial_path_list_init(heap, sizeof(heap)/sizeof(heap[0])));
800     path_root = sem_partial_path_create(heap);
801     if (!path_root)
802     {
803         rc = ESR_INVALID_STATE;
804         goto CLEANUP;
805     }
806 
807     /**
808      * prepare the transcription for processing
809      * split words by inserting NULL
810      * term by inserting double NULL at end
811      */
812     for (i = 0; transcription[i] && i < MAX_STRING_LEN - 2; i++)
813     {
814         if (transcription[i] == L(' '))
815             prepared_transcription[i] = 0;
816         else
817             prepared_transcription[i] = transcription[i];
818     }
819     prepared_transcription[i] = prepared_transcription[i+1] = 0; /* double null */
820 
821     /**
822      * Parse the graph
823      */
824 #if DEBUG_CPF
825     debug_base_arc_token = &semgraph->arc_token_list[0];
826     debug_depth = 0;
827 #endif
828     rc = checkpath_forward(semgraph, heap, &semgraph->arc_token_list[0], path_root, prepared_transcription);
829     if (rc == ESR_NO_MATCH_ERROR)
830     {
831         *resultCount = 0;
832         return ESR_SUCCESS; /* did not parse */
833     }
834     else if (rc == ESR_SUCCESS)
835     {
836         if (*resultCount > 0)
837             *resultCount = 1;
838         else
839         {
840             /**
841              * If the array to hold the results is not big enough,
842              * then tell the user right away by returning ESR_BUFFER_OVERFLOW
843        with the size required returned in resultCount */
844             rc = ESR_BUFFER_OVERFLOW;
845             PLogError(ESR_rc2str(rc));
846             goto CLEANUP;
847         }
848     }
849     else if (rc == ESR_INVALID_STATE)
850         goto CLEANUP;
851 
852     /* create the array of Semantic Result Pointers */
853     for (resultIdx = 0; resultIdx < *resultCount; resultIdx++)
854     {
855         raw_scripts_buf.num_scripts = 0;
856         for (i = 0; i < MAX_SCRIPTS; i++)
857         {
858             raw_scripts_buf.list[i].expression = 0;
859             raw_scripts_buf.list[i].ruleName = 0;
860         }
861 
862         /*
863          * Go through the partial paths which were successful and accumulate the scripts
864          * that you encountered (BUGGY)
865          */
866         CHKLOG(rc, accumulate_scripts(semgraph, &raw_scripts_buf, path_root));
867         CHKLOG(rc, sem_partial_path_free(heap, path_root));
868 
869         /*pfprintf(PSTDOUT,"Accumulated scripts\n");*/
870 
871         /*
872          * Prepare the scripts for processing, in other words, make them "nice".
873          * What I mean by making them nice is to do stuff like:
874          *
875          * if ruleName is:   root}
876          *    expression is: meaning='hello';meaning=meaning+' '+'world';
877          *
878          * what I want to accumulate is
879          *    root.meaning='hello';root.meaning=root.meaning+' '+'world';
880          *
881          * I am basically replacing END_SCOPE_MARKER with '.'  and inserting 'root.'
882          * before every lhs identifier.
883          *
884          */
885         for (dst = &acc_scripts[0], acc_scripts[0] = '\0', i = 0; i < raw_scripts_buf.num_scripts; ++i)
886         {
887             if (raw_scripts_buf.list[i].ruleName && raw_scripts_buf.list[i].expression &&
888                     raw_scripts_buf.list[i].ruleName != WORD_NOT_FOUND &&
889                     raw_scripts_buf.list[i].expression != WORD_NOT_FOUND)
890             {
891                 if (!LSTRCMP(raw_scripts_buf.list[i].expression, L(";")))
892                     continue;
893                 /* set the rule name in a temporary buffer and in the dst */
894                 src = raw_scripts_buf.list[i].ruleName;
895                 p = ruleName;
896                 /* trim off the trailing closing brace END_SCOPE_MARKER */
897                 while (*src && *src != END_SCOPE_MARKER)
898                 {
899                     CHKLOG(rc, append_with_check(&dst, *src, &acc_scripts[MAX_SCRIPT_LEN-1]));
900                     CHKLOG(rc, append_with_check(&p, *src, &ruleName[MAX_STRING_LEN-1]));
901                     ++src;
902                 }
903 
904 
905                 /* put a dot after the rule name, and before the lhs */
906                 CHKLOG(rc, append_with_check(&dst, L('.'), &acc_scripts[MAX_SCRIPT_LEN-1]));
907                 CHKLOG(rc, append_with_check(&p, L('.'), &ruleName[MAX_STRING_LEN-1]));
908 
909                 /* terminate the ruleName string */
910                 CHKLOG(rc, append_with_check(&p, 0, &ruleName[MAX_STRING_LEN-1]));
911 
912                 /* append the rest of the expression */
913                 src = raw_scripts_buf.list[i].expression;
914 
915                 while (ESR_TRUE)
916                 {
917                     /* get the LHS identifier, append to dst, and store temporarily in lhs buffer*/
918                     p = lhs;
919                     while (*src && *src != '=')
920                     {
921                         CHKLOG(rc, append_with_check(&dst, *src, &acc_scripts[MAX_SCRIPT_LEN-1]));
922                         CHKLOG(rc, append_with_check(&p, *src, &lhs[MAX_STRING_LEN-1]));
923                         ++src;
924                     }
925                     /* terminate the lhs string */
926                     CHKLOG(rc, append_with_check(&p, 0, &lhs[MAX_STRING_LEN-1]));
927 
928                     /* prepend every occurrence of the LHS identifier with 'ruleName.'*/
929                     for (; *src && *src != ';'; src += tokenLen)
930                     {
931                         const LCHAR* p2;
932 
933                         tokenLen = get_next_token_len(src);
934                         if (IS_LOCAL_IDENTIFIER(src, tokenLen)  /* || !LSTRCMP(token, lhs) */)
935                         {
936                             /* use p to copy stuff now */
937                             p = ruleName;
938                             while (*p)
939                             {
940                                 /* prepend the rule name to the identifier */
941                                 CHKLOG(rc, append_with_check(&dst, *p, &acc_scripts[MAX_SCRIPT_LEN-1]));
942                                 ++p;
943                             }
944                         }
945                         for (p2 = src; p2 < src + tokenLen; ++p2)
946                             CHKLOG(rc, append_with_check(&dst, *p2, &acc_scripts[MAX_SCRIPT_LEN-1]));
947                     }
948 
949                     /*
950                      * In an expression there may be several statements, each perhaps with a
951                      * new LHS identifier
952                      */
953 
954                     while (*src == ';')
955                         ++src; /* skip the double triple... semi-colons*/
956 
957                     if (!*src)
958                     {
959                         /* if end of the expression */
960                         /* terminate the eScript expression properly */
961                         CHKLOG(rc, append_with_check(&dst, L(';'), &acc_scripts[MAX_SCRIPT_LEN-1]));
962                         *dst = '\0';/* terminate the string, DO NOT DO ++ !!! possibility of next loop iteration
963                                        which will concatenate to the dst string */
964                         break;
965                     }
966                     else
967                     {
968                         /* concat a single semi-colon */
969                         CHKLOG(rc, append_with_check(&dst, L(';'), &acc_scripts[MAX_SCRIPT_LEN-1]));
970                         p = ruleName;
971                         while (*p)
972                         {
973                             /* prepend the rule name for the new statement */
974                             CHKLOG(rc, append_with_check(&dst, *p, &acc_scripts[MAX_SCRIPT_LEN-1]));
975                             ++p;
976                         }
977                     }
978                 }
979             }
980         }
981 #if defined( SREC_ENGINE_VERBOSE_LOGGING)
982         PLogMessage(L("Accumulated Scripts for (%s):\n%s"), transcription, acc_scripts);
983 #endif
984         if (&results[resultIdx] != NULL) /* SemanticResultImpl assumed to have been created externally */
985             interpretScripts(semproc, acc_scripts, &results[resultIdx]);
986 
987         /**
988          * Fill in the 'meaning', if it is not there
989          *  map 'ROOT.meaning' to 'meaning'
990          *
991          * NOTE: I am reusing some vars even though the names are a little bit inappropriate.
992          */
993         hashmap = ((SR_SemanticResultImpl*)results[resultIdx])->results;
994 
995         LSTRCPY(meaning, L("meaning"));
996         CHKLOG(rc, hashmap->containsKey(hashmap, meaning, &containsKey));
997         if (!containsKey)
998         {
999             LSTRCPY(meaning, ruleName); /* the last rule name encountered is always the root */
1000             LSTRCAT(meaning, L("meaning"));
1001             CHKLOG(rc, hashmap->containsKey(hashmap, meaning, &containsKey));
1002 
1003             if (containsKey)
1004             {
1005                 CHKLOG(rc, hashmap->get(hashmap, meaning, (void **)&p));
1006                 /* create a new memory location to hold the meaning... not the same as the other cause
1007          I do not want memory destroy problems */
1008                 /* add one more space */
1009                 dst = MALLOC(sizeof(LCHAR) * (LSTRLEN(p) + 1), L("semproc.meaning"));
1010                 if (dst == NULL)
1011                 {
1012                     rc = ESR_OUT_OF_MEMORY;
1013                     PLogError(ESR_rc2str(rc));
1014                     goto CLEANUP;
1015                 }
1016                 LSTRCPY(dst, p);
1017                 CHKLOG(rc, hashmap->put(hashmap, L("meaning"), dst));
1018                 dst = NULL;
1019             }
1020             else
1021                 /* absolutely no meaning was provided, so just concat all the values that are associated
1022                  * with the ROOT rule (key name begins with ROOT) */
1023             {
1024                 meaning[0] = 0;
1025                 CHKLOG(rc, hashmap->getSize(hashmap, &size));
1026                 for (j = 0; j < size; j++)
1027                 {
1028                     CHKLOG(rc, hashmap->getKeyAtIndex(hashmap, j, &p));
1029                     if (LSTRSTR(p, ruleName) == p) /* key name begins with root ruleName */
1030                     {
1031                         CHKLOG(rc, hashmap->get(hashmap, p, (void **)&dst));
1032                         if (meaning[0] != 0) /* separate vals with space */
1033                             LSTRCAT(meaning, L(" "));
1034                         LSTRCAT(meaning, dst);
1035                     }
1036                 }
1037                 if (meaning[0] != 0)
1038                 {
1039                     dst = MALLOC(sizeof(LCHAR) * (LSTRLEN(meaning) + 1), L("semproc.meaning"));
1040                     if (dst == NULL)
1041                     {
1042                         rc = ESR_OUT_OF_MEMORY;
1043                         PLogError(ESR_rc2str(rc));
1044                         goto CLEANUP;
1045                     }
1046                     LSTRCPY(dst, meaning);
1047                     CHKLOG(rc, hashmap->put(hashmap, L("meaning"), dst));
1048                     dst = NULL;
1049                 }
1050             }
1051         }
1052     }
1053 
1054     return ESR_SUCCESS;
1055 CLEANUP:
1056     if (dst != NULL) FREE(dst);
1057     return rc;
1058 }
1059 
1060 /**
1061  * After parsing, interpret the acumulated scripts
1062  */
interpretScripts(SR_SemanticProcessorImpl * semproc,LCHAR * scripts,SR_SemanticResult ** result)1063 static ESR_ReturnCode interpretScripts(SR_SemanticProcessorImpl* semproc,
1064         LCHAR* scripts, SR_SemanticResult** result)
1065 {
1066     ESR_ReturnCode rc;
1067     SR_SemanticResultImpl** impl = (SR_SemanticResultImpl**) result;
1068 
1069     if ((rc = LA_Analyze(semproc->analyzer, scripts)) == ESR_SUCCESS)
1070     {
1071         /****************************
1072          * If all goes well, then the result
1073          * will be written to the HashMap provided
1074          ****************************/
1075         if ((rc = EP_parse(semproc->parser, semproc->analyzer, semproc->symtable, semproc->eval, &((*impl)->results))) != ESR_SUCCESS)
1076             pfprintf(PSTDOUT, "Semantic Result: Error (%s) could not interpret\n", ESR_rc2str(rc));
1077     }
1078     return rc;
1079 }
1080 
1081 
1082 
1083 
1084 
1085 /***************************************************************/
1086 /* PartialPath stuff                                           */
1087 /***************************************************************/
1088 
sem_partial_path_list_init(sem_partial_path * heap,int nheap)1089 static ESR_ReturnCode sem_partial_path_list_init(sem_partial_path* heap, int nheap)
1090 {
1091     int i;
1092     for (i = 0; i < MAX_SEM_PARTIAL_PATHS - 1; i++)
1093         heap[i].next = &heap[i+1];
1094     heap[i].next = 0;
1095     return ESR_SUCCESS;
1096 }
1097 
sem_partial_path_create(sem_partial_path * heap)1098 static sem_partial_path* sem_partial_path_create(sem_partial_path* heap)
1099 {
1100     sem_partial_path* path = heap->next;
1101     if (path == NULL)
1102     {
1103         /* PLogError() is dangerous here, because the stack is very deep */
1104         pfprintf(PSTDERR, "sem_partial_path_create() no more partial paths available (limit=%d)\n", MAX_SEM_PARTIAL_PATHS);
1105         return NULL;
1106     }
1107 
1108     heap->next = path->next;
1109 
1110     path->next = NULL;
1111     path->arc_for_pp = NULL;
1112     return path;
1113 }
1114 
1115 #if DEBUG_CPF
sem_partial_path_print(sem_partial_path * path,sem_partial_path * paths,int npaths,wordmap * ilabels)1116 static void sem_partial_path_print(sem_partial_path* path,
1117         sem_partial_path* paths, int npaths, wordmap* ilabels)
1118 {
1119     int i;
1120     sem_partial_path* frompath = 0;
1121     arc_token* a;
1122 
1123     if (!path)
1124     {
1125         printf("--- END ---\n");
1126         return;
1127     }
1128     printf("path %p arc %d %p ", path, (path->arc_for_pp-debug_base_arc_token),
1129            path->arc_for_pp);
1130     if ((a = path->arc_for_pp) != NULL)
1131     {
1132         printf(" ilabel %d(%s) olabel %d\n",
1133                a->ilabel, ilabels->words[a->ilabel],
1134                a->olabel);
1135     }
1136     else
1137     {
1138         printf("\n");
1139     }
1140     printf(" from ");
1141     for (i = 0; i < npaths; i++)
1142     {
1143         if (paths[i].next == path)
1144         {
1145             frompath = &paths[i];
1146             break;
1147         }
1148     }
1149     if (1)sem_partial_path_print(frompath, paths, npaths, ilabels);
1150 }
1151 #endif
1152 
sem_partial_path_free(sem_partial_path * heap,sem_partial_path * path)1153 static ESR_ReturnCode sem_partial_path_free(sem_partial_path* heap, sem_partial_path* path)
1154 {
1155     path->next = heap->next;
1156     heap->next = path;
1157     return ESR_SUCCESS;
1158 }
1159 
1160 /***********************************************************************/
1161 
1162 
lookUpWord(SR_SemanticGraphImpl * semgraph,wordID wdid)1163 static const LCHAR* lookUpWord(SR_SemanticGraphImpl* semgraph, wordID wdid)
1164 {
1165     int wdID = wdid;
1166     int mid_offset, upper_offset;
1167     wordmap* mid_words;
1168     wordmap* upper_words;
1169 
1170     if (wdID < 0 || wdID >= MAXwordID)
1171         return WORD_NOT_FOUND;
1172 
1173     if (semgraph->scopes_olabel_offset < semgraph->script_olabel_offset)
1174     {
1175         mid_offset   = semgraph->scopes_olabel_offset;
1176         mid_words    = semgraph->scopes_olabels;
1177         upper_offset = semgraph->script_olabel_offset;
1178         upper_words  = semgraph->scripts;
1179     }
1180     else
1181     {
1182         mid_offset   = semgraph->script_olabel_offset;
1183         mid_words    = semgraph->scripts;
1184         upper_offset = semgraph->scopes_olabel_offset;
1185         upper_words  = semgraph->scopes_olabels;
1186     }
1187 
1188     if (wdID < mid_offset && wdID < semgraph->ilabels->num_words)
1189     {
1190         return semgraph->ilabels->words[wdID];
1191     }
1192     else if (wdID >= mid_offset && wdID < upper_offset)
1193     {
1194         wdID -= mid_offset;
1195         if (wdID >= 0 && wdID < mid_words->num_words)
1196             return mid_words->words[wdID];
1197     }
1198     else if (wdID >= upper_offset && wdID < MAXwordID)
1199     {
1200         wdID -= upper_offset;
1201         if (wdID >= 0 && wdID < upper_words->num_words)
1202             return upper_words->words[wdID];
1203     }
1204 
1205     return WORD_NOT_FOUND;
1206 }
1207 
lookUpScript(SR_SemanticGraphImpl * semgraph,const LCHAR * script_label)1208 static const LCHAR* lookUpScript(SR_SemanticGraphImpl* semgraph, const LCHAR* script_label)
1209 {
1210     size_t index;
1211 
1212     index = atoi(&script_label[1]); /* skip the prepended '_' */
1213 
1214     if (index > semgraph->scripts->num_words)
1215         return WORD_NOT_FOUND;
1216     else
1217         return semgraph->scripts->words[index];
1218 }
1219 
isnum(const LCHAR * str)1220 PINLINE ESR_BOOL isnum(const LCHAR* str)
1221 {
1222     if (!str || !*str)
1223         return ESR_FALSE;
1224 
1225     while (*str)
1226     {
1227         if (!isdigit(*str))
1228             return ESR_FALSE;
1229         str++;
1230     }
1231     return ESR_TRUE;
1232 }
1233 
1234 
accumulate_scripts(SR_SemanticGraphImpl * semgraph,script_list * scripts,sem_partial_path * path)1235 static ESR_ReturnCode accumulate_scripts(SR_SemanticGraphImpl* semgraph,
1236         script_list* scripts, sem_partial_path* path)
1237 {
1238     size_t scope = 0;
1239     arc_token* atok;
1240     sem_partial_path* p;
1241     const LCHAR* word;
1242     size_t j;
1243     ESR_ReturnCode rc;
1244 
1245     for (p = path; p != NULL; p = p->next)
1246     {
1247         atok = p->arc_for_pp;
1248         if (atok == NULL)
1249             continue;
1250         else if (atok->ilabel == WORD_EPSILON_LABEL && atok->olabel == WORD_EPSILON_LABEL)
1251             continue;
1252         else if (atok->olabel != WORD_EPSILON_LABEL)
1253         {
1254             LCHAR* _tMp;
1255             word = lookUpWord(semgraph, atok->olabel);
1256 
1257             if ( IS_BEGIN_SCOPE(word))
1258                 ++scope;
1259             else if ( IS_END_SCOPE(word) )
1260             {
1261                 j = scripts->num_scripts;
1262                 do
1263                 {
1264                     if (scripts->list[j].ruleName == (LCHAR*) scope) /* just an ID */
1265                         scripts->list[j].ruleName = word;
1266                     --j;
1267                 }
1268                 while (j != (size_t) - 1);
1269                 if (scope > 0)
1270                     --scope;
1271                 else
1272                 {
1273                     rc = ESR_INVALID_STATE;
1274                     PLogError(L("ESR_INVALID_STATE: Tried popping scope when it was zero"));
1275                     goto CLEANUP;
1276                 }
1277             }
1278             else
1279             {
1280                 /* make sure it is actually a script */
1281                 if (wordmap_find_index(semgraph->scripts, word) != MAXwordID)
1282                 {
1283                     MEMCHK(rc, scripts->num_scripts, MAX_SCRIPTS);
1284                     scripts->list[scripts->num_scripts].expression = word;
1285                     scripts->list[scripts->num_scripts].ruleName = (LCHAR*) scope; /* just an ID */
1286                     ++scripts->num_scripts;
1287                 }
1288                 /* else ignore */
1289             }
1290         }
1291     }
1292     return ESR_SUCCESS;
1293 CLEANUP:
1294     return rc;
1295 }
1296 
SR_SemanticProcessor_SetParam(SR_SemanticProcessor * self,const LCHAR * key,const LCHAR * value)1297 ESR_ReturnCode SR_SemanticProcessor_SetParam(SR_SemanticProcessor* self,
1298         const LCHAR* key, const LCHAR* value)
1299 {
1300     SR_SemanticProcessorImpl* impl = (SR_SemanticProcessorImpl*) self;
1301 
1302     if (self == NULL || key == NULL || value == NULL)
1303     {
1304         PLogError(L("ESR_INVALID_ARGUMENT"));
1305         return ESR_INVALID_ARGUMENT;
1306     }
1307 
1308     return ST_putSpecialKeyValue(impl->symtable, key, value);
1309 
1310 }
1311 
SR_SemanticProcessor_Flush(SR_SemanticProcessor * self)1312 ESR_ReturnCode SR_SemanticProcessor_Flush(SR_SemanticProcessor* self)
1313 {
1314     SR_SemanticProcessorImpl* impl = (SR_SemanticProcessorImpl*) self;
1315 
1316     if (self == NULL)
1317     {
1318         PLogError(L("ESR_INVALID_ARGUMENT"));
1319         return ESR_INVALID_ARGUMENT;
1320     }
1321     return ST_reset_all(impl->symtable);
1322 }
1323