• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  SemanticGraphImpl.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "SR_SemprocPrefix.h"
21 #include "SR_SemprocDefinitions.h"
22 #include "SR_SemanticGraph.h"
23 #include "SR_SemanticGraphImpl.h"
24 #include "SR_SemanticProcessorImpl.h"
25 #include "ESR_ReturnCode.h"
26 #include "passert.h"
27 #include "pendian.h"
28 #include "plog.h"
29 
30 static const char* MTAG = __FILE__;
31 #define AVG_SCRIPTS_PER_WORD 2.5
32 #define SLOTNAME_INDICATOR "__"
33 #define SLOTNAME_INDICATOR_LEN 2
34 
35 #define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : \
36                                (asr_uint32_t)(ptr - base)))
37 #define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
38 
SR_SemanticGraphCreate(SR_SemanticGraph ** self)39 ESR_ReturnCode SR_SemanticGraphCreate(SR_SemanticGraph** self)
40 {
41   SR_SemanticGraphImpl* impl;
42 
43   if (self == NULL)
44   {
45     PLogError(L("ESR_INVALID_ARGUMENT"));
46     return ESR_INVALID_ARGUMENT;
47   }
48   impl = NEW(SR_SemanticGraphImpl, MTAG);
49   if (impl == NULL)
50   {
51     PLogError(L("ESR_OUT_OF_MEMORY"));
52     return ESR_OUT_OF_MEMORY;
53   }
54   /* do not assume NEW initialize impl as zero, do it here */
55   memset(impl, 0, sizeof(SR_SemanticGraphImpl));
56 
57   impl->Interface.destroy = &SR_SemanticGraph_Destroy;
58   impl->Interface.unload = &SR_SemanticGraph_Unload;
59   impl->Interface.load = &SR_SemanticGraph_Load;
60   impl->Interface.save = &SR_SemanticGraph_Save;
61   impl->Interface.addWordToSlot = &SR_SemanticGraph_AddWordToSlot;
62   impl->Interface.reset = &SR_SemanticGraph_Reset;
63   impl->script_olabel_offset = SEMGRAPH_SCRIPT_OFFSET;
64   impl->scopes_olabel_offset = SEMGRAPH_SCOPE_OFFSET;
65 
66   *self = (SR_SemanticGraph*) impl;
67   return ESR_SUCCESS;
68 }
69 
70 
71 /**
72  * Default implementation.
73  */
SR_SemanticGraph_Destroy(SR_SemanticGraph * self)74 ESR_ReturnCode SR_SemanticGraph_Destroy(SR_SemanticGraph* self)
75 {
76   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
77 
78   if (self == NULL)
79   {
80     PLogError(L("ESR_INVALID_ARGUMENT"));
81     return ESR_INVALID_ARGUMENT;
82   }
83 
84   FREE(impl);
85   return ESR_SUCCESS;
86 }
87 
88 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp);
89 
90 
91 /* private function */
SR_SemanticGraph_LoadFromImage(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * g2g)92 ESR_ReturnCode SR_SemanticGraph_LoadFromImage(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* g2g)
93 {
94   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
95   PFile* fp = NULL;
96   struct
97   {
98     asr_uint32_t rec_context_image_size;
99     /*  image data size of the recognition graph */
100     asr_uint32_t format;
101   }
102   header;
103   ESR_ReturnCode rc = ESR_SUCCESS;
104   ESR_BOOL isLittleEndian;
105   /*
106     #if __BYTE_ORDER==__LITTLE_ENDIAN
107     isLittleEndian = ESR_TRUE;
108     #else
109     isLittleEndian = ESR_FALSE;
110     #endif
111   */
112   isLittleEndian = ESR_TRUE;
113 
114   fp = pfopen ( g2g, L("rb"));
115 /*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
116   CHKLOG(rc, PFileOpen(fp, L("rb")));*/
117 
118   if ( fp == NULL )
119     goto CLEANUP;
120 
121   /* header */
122   if (pfread(&header, 4, 2, fp) != 2)
123   {
124     rc = ESR_READ_ERROR;
125     PLogError(ESR_rc2str(rc));
126     goto CLEANUP;
127   }
128 
129   if (pfseek(fp, header.rec_context_image_size, SEEK_SET))
130   {
131     rc = ESR_READ_ERROR;
132     PLogError(L("ESR_READ_ERROR: could not seek to semgraph data"));
133     goto CLEANUP;
134   }
135 
136   if (header.format == IMAGE_FORMAT_V2)
137   {
138     rc = sr_semanticgraph_loadV2(impl, ilabels, fp);
139   }
140   else
141   {
142     rc = ESR_INVALID_STATE;
143     PLogError("PCLG.txt P.txt inconsistency");
144     goto CLEANUP;
145   }
146 
147 CLEANUP:
148   if (fp)
149     pfclose (fp);
150   if (rc != ESR_SUCCESS)
151   {
152     if (impl->arc_token_list != NULL)
153     {
154       FREE(impl->arc_token_list);
155       impl->arc_token_list = NULL;
156     }
157   }
158   return rc;
159 }
160 
161 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
162     PFile* fp);
163 
164 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
165     PFile* fp);
166 
sr_semanticgraph_loadV2(SR_SemanticGraphImpl * impl,wordmap * ilabels,PFile * fp)167 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp)
168 {
169   unsigned int i, nfields;
170   ESR_ReturnCode rc = ESR_SUCCESS;
171   struct
172   {
173     asr_uint32_t format;
174     asr_uint32_t sgtype;
175   }
176   header;
177   asr_uint32_t tmp[32];
178 
179   if (pfread(&header, 4/*sz*/, 2/*ni*/, fp) != 2)
180   {
181     rc = ESR_READ_ERROR;
182     PLogError(L("ESR_READ_ERROR: could not read V2"));
183     goto CLEANUP;
184   }
185 
186   if (header.sgtype == GrammarTypeItemList)
187   {
188     /*
189       tmp = new unsigned short[num_words];
190       if( pfread( tmp, sizeof(tmp[0]), num_words, fp) != num_words) {
191       rc = ESR_READ_ERROR;
192       PLogMessage("can't read %d word script assocs\n", num_words);
193       goto CLEANUP;
194       }
195     */
196     /* convert these to an arc_token_list or whatever */
197     PLogError("not supported v2 itemlist type");
198     rc = ESR_INVALID_STATE;
199     goto CLEANUP;
200 
201   }
202   else
203   {
204 
205     nfields = 2;
206     if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
207     {
208       rc = ESR_WRITE_ERROR;
209       PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
210       goto CLEANUP;
211     }
212     i = 0;
213     impl->script_olabel_offset = (wordID)tmp[i++];
214     impl->scopes_olabel_offset = (wordID)tmp[i++];
215     ASSERT(i == nfields);
216 
217     /* word arcs */
218     if ((rc = deserializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
219     {
220       PLogError(ESR_rc2str(rc));
221       goto CLEANUP;
222     }
223 
224     /* use the ilabels provided externally (from recog graph ilabels) */
225     impl->ilabels = ilabels;
226 
227     /* scopes */
228     if ((rc = deserializeWordMapV2(&impl->scopes_olabels, fp)) != ESR_SUCCESS)
229     {
230       PLogError(ESR_rc2str(rc));
231       goto CLEANUP;
232     }
233 
234     /* scripts */
235     if ((rc = deserializeWordMapV2(&impl->scripts, fp)) != ESR_SUCCESS)
236     {
237       PLogError(ESR_rc2str(rc));
238       goto CLEANUP;
239     }
240   }
241 CLEANUP:
242   return rc;
243 }
244 
245 
get_first_arc_leaving_node1(arc_token * arc_token_list,arcID num_arcs,nodeID node)246 static arc_token_lnk get_first_arc_leaving_node1(arc_token* arc_token_list,
247     arcID num_arcs,
248     nodeID node)
249 {
250   arcID i;
251   for (i = 0; i < num_arcs; i++)
252   {
253     if ((nodeID)(int)arc_token_list[i].next_token_index == node)
254       return ARC_TOKEN_LNK(arc_token_list, i);
255   }
256   return ARC_TOKEN_NULL;
257 }
258 
strlen_with_null(const char * word)259 static int strlen_with_null(const char* word)
260 { /* from srec_context.c */
261   int len = strlen(word) + 1;
262   if (len % 2 == 1) len++;
263   return len;
264 }
265 /* private function */
SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * basename,int num_words_to_add)266 ESR_ReturnCode SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
267 {
268   ESR_ReturnCode rc = ESR_FATAL_ERROR;
269   arcID num_scripts;
270   int isConstString = 0;
271   LCHAR filename[MAX_STRING_LEN];
272   LCHAR line[MAX_SCRIPT_LEN];
273   LCHAR iword[MAX_STRING_LEN];
274   LCHAR oword[MAX_SCRIPT_LEN];
275   LCHAR *p;
276   unsigned int max_num_arc_tokens;
277   nodeID from_node, into_node;
278   wordID ilabel = 0;
279   labelID olabel = 0;
280   arc_token *atoken;
281   arc_token *last_atoken;
282   costdata cost = 0;
283   arcID num_arcs;
284   arc_token* arc_token_list;
285   long fpos;
286   PFile* p_text_file = NULL;
287   PFile* scripts_file;
288   SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
289   size_t lineNo;
290   unsigned int i;
291   wordID num_scope_words;
292   asr_int32_t num_scope_chars;
293   LCHAR* _tMp;    /* used by IS_SCOPE_MARKER() below */
294 
295   /* use the ilables that are provided externally (from recog graph ilabels) */
296   semgraph->ilabels = ilabels;
297 
298 
299 
300   /* try to open the .script file */
301   LSTRCPY(filename, basename);
302   LSTRCAT(filename, ".script");
303   scripts_file = pfopen ( filename, L("r") );
304 /*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &scripts_file));
305   CHKLOG(rc, PFileOpen(scripts_file, L("r")));*/
306 
307   if ( scripts_file == NULL )
308   {
309     rc = ESR_OPEN_ERROR;
310     goto CLEANUP;
311   }
312 
313   /* Load the scripts file
314     assumptions:
315 
316   - the scripts file has each line ordered starting from 0 as such
317   <integer><space><script>
318 
319   - the integer MUST become the index of the script in the wordmap
320 
321   - output labels referenced in the semgraph are the integers (wordmap index) prepending with '_'
322 
323   - output labels stored in the semgraph are actually integers which are equal to
324     script_olabel_offset + <integer>
325   */
326 
327   /* determine number of words/chars to allocate */
328   fpos = pftell(scripts_file);
329   for (i = num_scripts = 0; pfgets(line, MAX_SCRIPT_LEN, scripts_file); num_scripts++)
330   {
331     size_t len = LSTRLEN(line) + 1;
332     if (len % 2) len++;
333     i = i + len; /* count the chars */
334   }
335   pfseek(scripts_file, fpos, SEEK_SET);
336 
337   /* on each line I will have 1 big word */
338   /* figure that each script for dynamically added words will be a simple assignment
339      like myVar='someVal' ... which looks like almost 2.5 words, hence *2.5 */
340   wordmap_create(&semgraph->scripts, i, num_scripts, (int)AVG_SCRIPTS_PER_WORD*num_words_to_add);
341 
342   /* load up all the information */
343   lineNo = 0;
344   while (pfgets(line, MAX_SCRIPT_LEN, scripts_file))
345   {
346     ASSERT( sizeof( iword[0]) == sizeof(char)); // else more code to write!
347     if (sscanf(line, "%s ", iword) == 1)
348     {
349       LSTRCPY(oword, line + LSTRLEN(iword) + 1);
350       /* may actually have spaces in it and this is messing me up ... here is the fix */
351       /* copy the line starting after the iword */
352       for (i = 0, p = line + LSTRLEN(iword) + 1; *p; p++)
353       {
354         if (*p == '\\')
355         {
356           if (isConstString)
357             oword[i++] = *p;
358           ++p;
359         }
360         else if (*p == '\'')
361           isConstString = (isConstString ? 0 : 1) ; /* toggle */
362         if (isConstString || !isspace(*p))
363           oword[i++] = *p;
364       }
365       oword[i] = '\0';
366 
367       /* make sure that the index in the wordmap matches the line number */
368       if (wordmap_add_word(semgraph->scripts, oword) != lineNo)
369       {
370         PLogError(L("ESR_READ_ERROR: internal error adding script (%d)"), num_words_to_add);
371         return ESR_NO_MATCH_ERROR;
372       }
373       lineNo++;
374     }
375     else
376     {
377       PLogMessage(L("can't parse line %s"), line);
378       passert(0);
379     }
380   }
381   pfclose (scripts_file);
382 
383   /* try to open the P.txt file */
384   LSTRCPY(filename, basename);
385   LSTRCAT(filename, ".P.txt");
386   p_text_file = pfopen ( filename, L("r"));
387 /*  CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &p_text_file));
388   CHKLOG(rc, PFileOpen(p_text_file, L("r")));*/
389 
390   if ( p_text_file == NULL )
391     goto CLEANUP;
392 
393   /* determine number of word arcs to allocate */
394   fpos = pftell(p_text_file);
395   num_scope_words = 0;
396   num_scope_chars = 0;
397   for (num_arcs = 0; pfgets(line, MAX_STRING_LEN, p_text_file); ++num_arcs)
398   {
399     if (num_arcs == MAXarcID)
400       break; /* error */
401 	if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
402     {
403 		if (IS_SCOPE_MARKER(oword)) {
404 			num_scope_words++;
405 			num_scope_chars += strlen_with_null( oword);
406 			if(num_scope_chars) num_scope_chars++ ;
407   }
408 	}
409   }
410   max_num_arc_tokens = num_arcs + (arcID)num_words_to_add;
411   MEMCHK(rc, max_num_arc_tokens, MAXarcID);
412   pfseek(p_text_file, fpos, SEEK_SET);
413 
414   semgraph->arc_token_list = NEW_ARRAY(arc_token,max_num_arc_tokens, L("semgraph.wordgraph"));
415   arc_token_list = semgraph->arc_token_list;
416   /* need to initialize my wordmap */
417   wordmap_create(&semgraph->scopes_olabels, num_scope_chars, num_scope_words,0); // max_num_arc_tokens);
418 
419   /* 1. first load up all the information */
420   i = 0;
421   while (pfgets(line, MAX_STRING_LEN, p_text_file))
422   {
423     if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
424     {
425       /* the cost is 0 by default */
426       cost = 0;
427       /* since I am reading strings, and I want to store integers, I need to get
428       the index of the string by looking up in the ilabels wordmap */
429       ilabel = wordmap_find_index(ilabels, iword);
430 
431       /* now for the olabels, depending on the type of the label, I either use the index directly
432       or save the index in a wordmap which will eventually give me the right index.
433       Remember that the index must be offset by a certain value depending on which wordmap I'm using */
434 
435       if (IS_SCRIPT_MARKER(oword)) /* olabel type: script */
436       {
437         olabel = (labelID) atoi(&oword[1]);
438         olabel = (wordID)(olabel + semgraph->script_olabel_offset); /* the offset */
439       }
440       else if (IS_SCOPE_MARKER(oword)) /* olabel type: scope marker */
441       {
442         /* check if the label is already in the wordmap, and reuse index */
443         olabel = wordmap_find_index(semgraph->scopes_olabels, oword);
444 
445         if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
446           olabel = wordmap_add_word(semgraph->scopes_olabels, oword);
447         olabel = (wordID)(olabel + semgraph->scopes_olabel_offset); /* the offset */
448       }
449       else /* olabel type: input symbols hopefully !!! */
450       {
451 	/* if oword does not have a \t in the end, add a \t*/
452 
453         /* check if the label is already in the wordmap, and reuse index */
454         olabel = wordmap_find_index(ilabels, oword);
455 
456         if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
457           PLogMessage(L("output label not found: %s"), oword);
458       }
459 
460     }
461     else if (sscanf(line, "%hu", &from_node) == 1)
462     {
463       into_node = MAXnodeID;
464       ilabel = MAXwordID;
465       olabel = MAXwordID;
466       cost = 0;
467     }
468     else
469     {
470       PLogMessage(L("can't parse line %s"), line);
471       passert(0);
472     }
473 
474     /* okay, now that I have the data for the current arc, save it to the arc_token data structure*/
475     atoken = &arc_token_list[i];
476     ++i;
477 
478     atoken->ilabel = ilabel;
479     atoken->olabel = olabel;
480     /* atoken->cost = cost; not used for now */
481 
482     /* initially this stores INTEGERS !!! , I need to cross-reference the integers with the
483     appropriate arc_token pointers (in the next steps for the algorithm) */
484     atoken->first_next_arc = (arc_token_lnk)into_node;
485     atoken->next_token_index = (arc_token_lnk)from_node;
486   }
487   num_arcs = (arcID) i;
488 
489   pfclose(p_text_file);
490   p_text_file = NULL;
491 
492   wordmap_setbase(semgraph->scopes_olabels);
493   wordmap_ceiling(semgraph->scopes_olabels); /* we won't be adding scopes! */
494   wordmap_setbase(semgraph->scripts);
495 
496   /* 2. now do the internal cross references */
497   /* in this pass we build the 1-to-1 links, and n-to-1 links in a graph */
498   /* in other words... first_next_arc points to the first arc leaving the node */
499   for (i = 0; i < num_arcs; ++i)
500   {
501     atoken = &arc_token_list[i];
502     into_node = (nodeID)(int)atoken->first_next_arc; /* get the integer */
503     atoken->first_next_arc = /* converts the integer id to a arc_token pointer */
504       get_first_arc_leaving_node1(arc_token_list, num_arcs, (nodeID)(int)atoken->first_next_arc);
505   }
506 
507   /* 3. now do more internal cross refs */
508   /* in this pass we build the 1-to-n links */
509   /* in other words ... setup the linked list of all arc leaving from the same node */
510   last_atoken = &arc_token_list[0];
511   for (i = 1; i < num_arcs; ++i)
512   {
513     atoken = &arc_token_list[i];
514     /* if this arc and the last one do NOT leave the same node (i.e. from_node, see above),
515     then the next_token_index is not used */
516     if (atoken->next_token_index != last_atoken->next_token_index)
517       last_atoken->next_token_index = ARC_TOKEN_NULL;
518     else
519       last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
520     last_atoken = atoken;
521   }
522   last_atoken->next_token_index = ARC_TOKEN_NULL;
523 
524 #if DEBUG_ASTAR
525   /* under debug, it's nice to be able to see the words leaving the
526      destination node, they are stored sequentially in the debug ary */
527   for (i = 0; i < num_arcs; i++)
528   {
529     LCHAR * p;
530     arc_token* tmp;
531     atoken = &arc_token_list[i];
532     atoken->debug[0] = 0;
533     tmp = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
534     for (; tmp; tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
535     {
536       if (tmp->first_next_arc == ARC_TOKEN_NULL)
537         p = "END";
538       else if (!tmp->label)
539         p = "NULL";
540       else
541         p = tmp->label;
542       if (strlen(atoken->debug) + strlen(p) + 6 < 64)
543       {
544         strcat(atoken->debug, p);
545         strcat(atoken->debug, " ");
546       }
547       else
548       {
549         strcat(atoken->debug, "...");
550         break;
551       }
552     }
553   }
554 #endif
555   semgraph->arc_token_list_len = (arcID)max_num_arc_tokens;
556   /* initialize the freelist */
557   if (num_arcs < max_num_arc_tokens)
558   {
559     semgraph->arc_token_freelist = &semgraph->arc_token_list[num_arcs];
560     for (i = num_arcs; i < max_num_arc_tokens - 1; i++)
561     {
562       semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
563       semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(semgraph->arc_token_list, (i + 1));
564     }
565     semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
566     semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
567   }
568   else
569     semgraph->arc_token_freelist = NULL;
570 
571   /* for dynamic addition */
572   for (i = 0; i < MAX_NUM_SLOTS; i++)
573     semgraph->arcs_for_slot[i] = NULL;
574 
575 	semgraph->arc_token_insert_start = semgraph->arc_token_list + num_arcs;
576     semgraph->arc_token_insert_end = NULL;
577   return ESR_SUCCESS;
578 CLEANUP:
579   if (p_text_file)
580     pfclose (p_text_file);
581   return rc;
582 }
583 
SR_SemanticGraph_Load(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * basename,int num_words_to_add)584 ESR_ReturnCode SR_SemanticGraph_Load(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
585 {
586   ESR_ReturnCode rc;
587 
588   if (LSTRSTR(basename, L(".g2g")))
589   {
590     rc = SR_SemanticGraph_LoadFromImage(self, ilabels, basename);
591   }
592   else
593   {
594     rc = SR_SemanticGraph_LoadFromTextFiles(self, ilabels, basename, num_words_to_add);
595   }
596   return rc;
597 }
598 
599 /**
600  * Unload Sem graph
601  */
SR_SemanticGraph_Unload(SR_SemanticGraph * self)602 ESR_ReturnCode SR_SemanticGraph_Unload(SR_SemanticGraph* self)
603 {
604   SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
605 
606   /* see the wordmap_create in the Load function */
607   wordmap_destroy(&semgraph->scopes_olabels);
608   wordmap_destroy(&semgraph->scripts);
609 
610   FREE(semgraph->arc_token_list);
611   semgraph->arc_token_list = 0;
612   return ESR_SUCCESS;
613 }
614 
615 ESR_ReturnCode sr_semanticgraph_saveV1(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
616 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
617 
SR_SemanticGraph_Save(SR_SemanticGraph * self,const LCHAR * g2g,int version_number)618 ESR_ReturnCode SR_SemanticGraph_Save(SR_SemanticGraph* self, const LCHAR* g2g, int version_number)
619 {
620   SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
621   ESR_ReturnCode rc = ESR_SUCCESS;
622 
623   if (version_number == 2)
624   {
625     rc = sr_semanticgraph_saveV2(impl,  g2g);
626   }
627   else
628   {
629     PLogError("invalid version_number %d\n", version_number);
630     rc = ESR_INVALID_ARGUMENT;
631   }
632   return rc;
633 }
634 
635 
sr_semanticgraph_get_type(SR_SemanticGraphImpl * impl)636 int sr_semanticgraph_get_type(SR_SemanticGraphImpl* impl)
637 {
638   arc_token *atoken, *arc_token_list = impl->arc_token_list;
639   arc_token_lnk mergept;
640   int expected_ilabel;
641   atoken = impl->arc_token_list;
642 
643   /* 0 1 eps {
644      1 2 13e_avenue myRoot}
645      ...
646      1 2 13e_avenue myRoot}
647      2 */
648   if (atoken->ilabel != WORD_EPSILON_LABEL)
649     return GrammarTypeBNF;
650   atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
651   if (!atoken)
652     return GrammarTypeBNF;
653   mergept = atoken->first_next_arc;
654   expected_ilabel = NUM_ITEMLIST_HDRWDS;
655   for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
656   {
657     if (atoken->first_next_arc != mergept)
658       return GrammarTypeBNF;
659     if (atoken->ilabel != expected_ilabel)
660       return GrammarTypeBNF;
661     expected_ilabel++;
662   }
663   if (expected_ilabel != impl->ilabels->num_words)
664     return GrammarTypeBNF;
665   atoken = ARC_TOKEN_PTR(arc_token_list, mergept);
666   for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc))
667   {
668     if (atoken->next_token_index != ARC_TOKEN_NULL)
669       return GrammarTypeBNF;
670     if (atoken->ilabel != WORD_EPSILON_LABEL &&
671         !(atoken->ilabel == MAXwordID && atoken->olabel == MAXwordID))
672       return GrammarTypeBNF;
673   }
674   return GrammarTypeItemList;
675 }
676 
677 #define SEMGR_OUTPUT_FORMAT_V2 478932784
678 
sr_semanticgraph_saveV2(SR_SemanticGraphImpl * impl,const LCHAR * g2g)679 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g)
680 {
681   ESR_ReturnCode rc;
682   PFile* fp;
683   asr_uint32_t tmp[32];
684   struct
685   {
686     asr_uint32_t format;
687     asr_uint32_t sgtype;
688   }
689   header;
690   unsigned int i, nfields;
691 
692   fp = pfopen ( g2g, L("r+b"));
693 /*  CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
694   CHKLOG(rc, PFileOpen(fp, L("r+b")));*/
695 
696   if ( fp == NULL )
697   {
698   	rc = ESR_OPEN_ERROR;
699     goto CLEANUP;
700   }
701 
702   pfseek(fp, 0, SEEK_END);
703 
704   header.format = IMAGE_FORMAT_V2;
705   header.sgtype = sr_semanticgraph_get_type(impl);
706   header.sgtype = GrammarTypeBNF;
707 
708 #ifdef SREC_ENGINE_VERBOSE_LOGGING
709   PLogMessage("sr_semanticgraph_saveV2() semgraphtype %d", header.sgtype);
710 #endif
711   if (pfwrite(&header, 4 /*sz*/, 2/*ni*/, fp) != 2)
712   {
713     rc = ESR_WRITE_ERROR;
714     PLogError(L("ESR_WRITE_ERROR: could not write V2"));
715     goto CLEANUP;
716   }
717 
718   if (header.sgtype == GrammarTypeItemList)
719   {
720     arc_token *parser, *atok;
721 
722     /* write num_words size array of short script ids
723        this might be just a y=x array, but it could be there
724        are synonyms, eg. NEW_YORK NEW_YORK_CITY -> same script
725     */
726     parser = impl->arc_token_list;
727     parser = ARC_TOKEN_PTR(impl->arc_token_list, parser->first_next_arc);
728     for (i = NUM_ITEMLIST_HDRWDS; i < impl->ilabels->num_words; i++)
729     {
730       for (atok = parser; atok; atok = ARC_TOKEN_PTR(impl->arc_token_list, atok->next_token_index))
731       {
732         if (atok->ilabel == i) break;
733       }
734       if (!atok)
735       {
736         rc = ESR_INVALID_STATE;
737         PLogError("Can't find word %d in semgraph\n", i);
738         goto CLEANUP;
739       }
740       tmp[0] = atok->olabel;
741       if (pfwrite(tmp, sizeof(tmp[0]), 1, fp) != 1)
742       {
743         rc = ESR_WRITE_ERROR;
744         PLogError(L("ESR_WRITE_ERROR: could not write V2"));
745         goto CLEANUP;
746       }
747     }
748     if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
749     {
750       PLogError(ESR_rc2str(rc));
751       goto CLEANUP;
752     }
753   }
754   else
755   {
756 
757     i = 0;
758     tmp[i++] = impl->script_olabel_offset;
759     tmp[i++] = impl->scopes_olabel_offset;
760     nfields = i;
761 
762     if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
763     {
764       rc = ESR_WRITE_ERROR;
765       PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
766       goto CLEANUP;
767     }
768 
769     /* word arcs */
770     if ((rc = serializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
771     {
772       PLogError(ESR_rc2str(rc));
773       goto CLEANUP;
774     }
775 
776     /* do not WRITE ilabels... this is a ref to the olabels from rec context */
777 
778     /* scopes */
779     if ((rc = serializeWordMapV2(impl->scopes_olabels, fp)) != ESR_SUCCESS)
780     {
781       PLogError(ESR_rc2str(rc));
782       goto CLEANUP;
783     }
784 
785     if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
786     {
787       PLogError(ESR_rc2str(rc));
788       goto CLEANUP;
789     }
790 #ifdef SREC_ENGINE_VERBOSE_LOGGING
791     PLogMessage("G2G done WR semg %d", pftell(fp));
792 #endif
793   }
794   rc = ESR_SUCCESS;
795 CLEANUP:
796   if (fp)
797     pfclose (fp);
798   return rc;
799 }
800 
arc_tokens_find_ilabel(arc_token * base,arc_token * arc_token_list,wordID ilabel)801 arc_token* arc_tokens_find_ilabel(arc_token* base, arc_token* arc_token_list, wordID ilabel)
802 {
803   arc_token* p;
804   for (p = arc_token_list; p != NULL; p = ARC_TOKEN_PTR(base, p->next_token_index))
805     if (p->ilabel == ilabel) return p;
806   return NULL;
807 }
808 
arc_tokens_get_free(arc_token * base,arc_token ** arc_token_freelist)809 arc_token* arc_tokens_get_free(arc_token* base, arc_token** arc_token_freelist)
810 {
811   arc_token* tmp = (*arc_token_freelist);
812   if (tmp == NULL)
813     return NULL;
814   (*arc_token_freelist) = ARC_TOKEN_PTR(base, tmp->next_token_index);
815   tmp->ilabel = tmp->olabel = 0;
816   tmp->next_token_index = ARC_TOKEN_NULL;
817   tmp->first_next_arc = ARC_TOKEN_NULL;
818   return tmp;
819 }
820 
arc_tokens_list_size(arc_token * base,arc_token * head)821 int arc_tokens_list_size(arc_token* base, arc_token* head)
822 {
823   arc_token* tmp = head;
824   int count = 0;
825   for (; tmp; tmp = ARC_TOKEN_PTR(base, tmp->next_token_index))
826   {
827     count++;
828   }
829   return count;
830 }
831 
arc_tokens_free_list(arc_token * base,arc_token ** arc_token_freelist,arc_token * head)832 void arc_tokens_free_list(arc_token* base, arc_token** arc_token_freelist, arc_token* head)
833 {
834   arc_token *tail, *next = (arc_token*)1;
835   if (head == NULL)
836     return;
837   for (tail = head; ; tail = next)
838   {
839     next = ARC_TOKEN_PTR(base, tail->next_token_index);
840     if (next == NULL) break;
841   }
842   tail->next_token_index = ARC_TOKEN_PTR2LNK(base, (*arc_token_freelist));
843   *arc_token_freelist = head;
844 }
845 
find_in_union_of_scripts(const LCHAR * union_script,const LCHAR * script,ESR_BOOL * isFound)846 ESR_ReturnCode find_in_union_of_scripts(const LCHAR* union_script, const LCHAR* script, ESR_BOOL* isFound)
847 {
848   const LCHAR* start;
849   const LCHAR* end;
850   const LCHAR* p;
851   const LCHAR* q;
852 
853   if (union_script == NULL || script == NULL)
854     return ESR_INVALID_ARGUMENT;
855 
856   start = LSTRCHR(union_script, L('\''));
857   if (start == NULL)
858     return ESR_INVALID_ARGUMENT;
859 
860   start++; /* point to first char after \' */
861 
862   end = LSTRCHR(start, L('\'')); /* point to last \' */
863   if (end == NULL)
864     return ESR_INVALID_ARGUMENT;
865 
866   p = start;
867 
868   start = LSTRCHR(script, L('\''));
869   if (start == NULL)
870     return ESR_INVALID_ARGUMENT;
871   start++; /* point to first char after \' */
872 
873   q = start;
874 
875   while (p < end)
876   {
877     if (*p == MULTIPLE_MEANING_JOIN_CHAR) /* if at the end of a meaning (not end of union)
878                                                   and p matched q all the way up to join char then found! */
879     {
880       *isFound = ESR_TRUE;
881       return ESR_SUCCESS;
882     }
883     else if (*p == *q) /* while same keep going */
884     {
885       if (*p == *(end - 1)) /* if at the end and p matched q all the way then found! */
886       {
887         *isFound = ESR_TRUE;
888         return ESR_SUCCESS;
889       }
890       q++;
891     }
892     else /* skip to next meaning after join char */
893     {
894       while (*p != MULTIPLE_MEANING_JOIN_CHAR && p < end)
895         p++;
896       /* reset q */
897       q = start;
898     }
899     p++;
900   }
901 
902   *isFound = ESR_FALSE;
903   return ESR_SUCCESS;
904 }
905 
906 #define QUOTE_CHAR L('\'')
count_num_literals(const LCHAR * a,const LCHAR ** start_points,int max_num_start_points)907 int count_num_literals(const LCHAR* a, const LCHAR** start_points, int max_num_start_points)
908 {
909   int num = 0;
910   const LCHAR *p, *q = a;
911   const LCHAR *end = a + LSTRLEN(a);
912   while (1)
913   {
914     /* look for starting QUOTE_CHAR */
915     for (p = q; p < end; p++)
916     {
917       if (*p == ESC_CHAR) p++;
918       else if (*p == QUOTE_CHAR) break;
919     }
920     if (p == end) break;
921     if (num > max_num_start_points) break; /* just abort the counting! */
922     start_points[num] = p;
923     /* look for ending QUOTE_CHAR */
924     for (q = p + 1; q < end; q++)
925     {
926       if (*q == ESC_CHAR) q++;
927       else if (*q == QUOTE_CHAR) break;
928     }
929     if (q == end) /* does not close! */
930       return -1;
931     p = ++q;
932     num++;
933   }
934   return num;
935 }
union_literal_pair(LCHAR * o,LCHAR * a,LCHAR * b,LCHAR ** pptra)936 int union_literal_pair(LCHAR* o, LCHAR* a, LCHAR* b, LCHAR** pptra)
937 {
938   LCHAR *enda, *ptra, *endb, *ptrb;
939   LCHAR *p, *ptro;
940   enda = a + LSTRLEN(a);
941   endb = b + LSTRLEN(b);
942   /* capture the data from a to ptra */
943   for (ptra = a + 1; ptra < enda; ptra++)
944   {
945     if (*ptra == ESC_CHAR) ptra++;
946     else if (*ptra == QUOTE_CHAR) break;
947   }
948   /* capture the data from b to ptrb */
949   for (ptrb = b + 1; ptrb < endb; ptrb++)
950   {
951     if (*ptrb == ESC_CHAR) ptrb++;
952     else if (*ptrb == QUOTE_CHAR) break;
953   }
954   /* now make the output */
955   ptro = o;
956   *ptro++ = QUOTE_CHAR;
957   for (p = a + 1; p < ptra; p++) *ptro++ = *p;
958   *ptro++ = MULTIPLE_MEANING_JOIN_CHAR;
959   for (p = b + 1; p < ptrb; p++) *ptro++ = *p;
960   *ptro++ = QUOTE_CHAR;
961   *ptro++ = 0;
962   *pptra = ptra + 1;
963   return 0;
964 }
965 
966 /* now handles n1='52';n2='62'; UNION n1='53';nx='63'; */
967 
make_union_of_scripts(LCHAR * union_script,const size_t max_len,const LCHAR * a,const LCHAR * b)968 ESR_ReturnCode make_union_of_scripts(LCHAR* union_script, const size_t max_len, const LCHAR* a, const LCHAR* b)
969 {
970   int i, num_literals_in_a, num_literals_in_b;
971   LCHAR *spa[8], *spb[8], *spo[8], *ptra;
972 
973   if (a == NULL || b == NULL)
974     return ESR_INVALID_ARGUMENT;
975 
976   num_literals_in_a = count_num_literals(a, (const LCHAR **)spa, 8);
977   num_literals_in_b = count_num_literals(b, (const LCHAR **)spb, 8);
978 
979   if (num_literals_in_a == 0 && num_literals_in_b == 0)
980   {
981     if (LSTRLEN(a) > max_len) return ESR_BUFFER_OVERFLOW;
982     else
983     {
984       LSTRCPY(union_script, a);
985       return ESR_SUCCESS;
986     }
987   }
988   else if (num_literals_in_a != num_literals_in_b)
989   {
990     return ESR_INVALID_ARGUMENT;
991   }
992 
993   /* V='Springfield_IL' union V='Springfield_MA' is V='Springfield_IL#Springfield_MA' */
994   /* 18               +       18          -2     =  33 + 1 for NULL             */
995   if ((LSTRLEN(a) + LSTRLEN(b) - 2) > max_len)
996   {
997     PLogError("Temp buffer (size %d) to hold union of multiple meanings (size %d) is too small", max_len, (LSTRLEN(a) + LSTRLEN(b) - 2));
998     return ESR_BUFFER_OVERFLOW;
999   }
1000 
1001   LSTRCPY(union_script, a);
1002   for (i = 0; i < num_literals_in_a; i++)
1003   {
1004     count_num_literals(union_script, (const LCHAR **)spo, 8);
1005     /* here union_script is n0='52';n1='62'; */
1006     union_literal_pair(spo[i], spa[i], spb[i], &ptra);
1007 #ifdef _WIN32
1008     if (LSTRLEN(spo[i]) > MAX_SEMPROC_VALUE)
1009       pfprintf(PSTDOUT, "Warning: won't be able to parse this script! len %d>%d %s\n", LSTRLEN(spo[i]), MAX_SEMPROC_VALUE, spo[i]);
1010 #endif
1011     /* here union_script is n0='52#53' */
1012     LSTRCAT(union_script, ptra);
1013     /* here union_script is n0='52#53';n1='62'; */
1014   }
1015   return ESR_SUCCESS;
1016 }
1017 
1018 /**
1019  * Default implementation.
1020  */
SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph * self,const LCHAR * _slot,const LCHAR * word,const LCHAR * script,const ESR_BOOL newWordAddedToFST)1021 ESR_ReturnCode SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph* self, const LCHAR* _slot, const LCHAR* word, const LCHAR* script, const ESR_BOOL newWordAddedToFST)
1022 {
1023   struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1024   arc_token *token, *tmp;
1025   arc_token *tmp_arc_token_list;
1026   wordID wdID, scriptID, old_scriptID;
1027   wordID slotID;
1028   LCHAR union_script[MAX_STRING_LEN]; /* sizeof used elsewhere */
1029   ESR_ReturnCode rc; int i;
1030   int tmp_arc_token_list_len;
1031   int offset;
1032 #define MAX_WORD_LEN 128
1033   char veslot[MAX_WORD_LEN];
1034 
1035   if (script == NULL || *script == L('\0') || !LSTRCMP(script, L("NULL")))
1036     return ESR_SUCCESS; /* no script to add so keep going */
1037 
1038   /* find out if the word I am adding already exists. If it already exists, then that means that I
1039      potentially am adding an alternate meaning for the word */
1040   /* the slotname in .PCLG.txt and .map files use __ as the indicator. Xufang */
1041   if(_slot[0] == '@') {
1042     strcpy(veslot,SLOTNAME_INDICATOR);
1043     strcat(veslot,_slot+1);
1044     strcat(veslot,SLOTNAME_INDICATOR);
1045   } else
1046     strcpy(veslot, _slot);
1047 
1048   slotID = wordmap_find_rule_index(impl->ilabels, veslot);
1049   if (slotID == MAXwordID)
1050   {
1051     PLogError(L("ESR_NO_MATCH_ERROR: Could not find slotID in wordmap %s"), _slot);
1052     return ESR_NO_MATCH_ERROR;
1053   }
1054   wdID = wordmap_find_index_in_rule(impl->ilabels, word, slotID);
1055   if (wdID == MAXwordID)
1056   {
1057     PLogError(L("ESR_NO_MATCH_ERROR: Could not find wordID/slotID in wordmap %s/%d"), word, slotID);
1058     return ESR_NO_MATCH_ERROR;
1059   }
1060 
1061   /* **this is an optimization step** */
1062   /* Is word already added in this slot? if so, get the token pointer, else, token is NULL
1063    *
1064    * the assumption is that FST_AddWordToGrammar will tell us if this word was newly added in the FST, or
1065    * if the word was added at least 1 iteration ago, meaning that I have already added it to my
1066    * semgraph slot at some earlier point
1067    */
1068   if (newWordAddedToFST)
1069     token = NULL;
1070   else
1071     token = arc_tokens_find_ilabel(impl->arc_token_list, impl->arcs_for_slot[slotID], wdID);
1072 
1073 #define FST_GROW_FACTOR   12/10
1074 #define FST_GROWARCS_MIN    100
1075   if (token == NULL) /* new word to add to slot */
1076   {
1077     /* add the script if new  */
1078     scriptID = wordmap_find_index(impl->scripts, script);
1079     if (scriptID == MAXwordID)
1080       scriptID = wordmap_add_word(impl->scripts, script);
1081     if (scriptID == MAXwordID)
1082     {
1083       PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1084       return ESR_OUT_OF_MEMORY;
1085     }
1086 
1087     token = impl->arcs_for_slot[slotID];
1088     tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1089     if (tmp == NULL)
1090       {
1091 #if defined (FST_GROW_FACTOR)
1092 	tmp_arc_token_list_len = impl->arc_token_list_len * FST_GROW_FACTOR;
1093 	if(tmp_arc_token_list_len - impl->arc_token_list_len <=FST_GROWARCS_MIN)
1094 	  tmp_arc_token_list_len+=FST_GROWARCS_MIN;
1095 
1096 	tmp_arc_token_list= NEW_ARRAY(arc_token,tmp_arc_token_list_len, L("semgraph.wordgraph"));
1097 	if(!tmp_arc_token_list) {
1098 	  PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of semgraph.wordgraph"));
1099 	  return ESR_OUT_OF_MEMORY;
1100 	}
1101 	memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1102 
1103 	for(i=0; i<MAX_NUM_SLOTS;i++)
1104 	  {
1105 	    if(impl->arcs_for_slot[i] != NULL) {
1106 	      offset = impl->arcs_for_slot[i] - impl->arc_token_list;
1107 	      impl->arcs_for_slot[i] = tmp_arc_token_list + offset;
1108 	    }
1109 	  }
1110 	token = impl->arcs_for_slot[slotID];
1111 
1112 	ASSERT( impl->arc_token_freelist == NULL);
1113 
1114 	impl->arc_token_freelist = tmp_arc_token_list + impl->arc_token_list_len;
1115 
1116 	FREE(impl->arc_token_list);
1117 	impl->arc_token_insert_start = tmp_arc_token_list + (impl->arc_token_insert_start - impl->arc_token_list); //Rabih fix
1118 	impl->arc_token_list = tmp_arc_token_list;
1119 
1120 	for (i = impl->arc_token_list_len; i < tmp_arc_token_list_len - 1; i++)
1121 	  {
1122 	    impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1123 	    impl->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, (i + 1));
1124 	  }
1125 	impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1126 	impl->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
1127 
1128 	impl->arc_token_list_len = tmp_arc_token_list_len;
1129 	tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1130       }
1131 #endif
1132     if(tmp == NULL) {
1133       PLogError(L("ESR_OUT_OF_MEMORY: Error adding more arcs to graph\n"));
1134       return ESR_OUT_OF_MEMORY;
1135     }
1136     impl->arcs_for_slot[slotID] = tmp;
1137     tmp->next_token_index = ARC_TOKEN_PTR2LNK(impl->arc_token_list, token);
1138     tmp->ilabel = wdID;
1139     tmp->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1140   }
1141   else
1142   {
1143     old_scriptID = token->olabel - impl->script_olabel_offset;
1144 
1145     if (!LSTRCMP(impl->scripts->words[old_scriptID], script))
1146     {
1147       /* nothing to do, we have the word, same meaning again so do nothing */
1148     }
1149     else
1150     {
1151 
1152       CHKLOG(rc, make_union_of_scripts(union_script, sizeof(union_script), impl->scripts->words[old_scriptID], script));
1153 
1154 #ifdef SREC_ENGINE_VERBOSE_LOGGING
1155       PLogMessage(L("Adding alternate meaning %s for word %s (%s) in slot %s\n"), script, word,
1156                   impl->scripts->words[old_scriptID], impl->ilabels->words[slotID]);
1157 #endif
1158       /* add the union as if new (if not already there) */
1159       scriptID = wordmap_find_index(impl->scripts, union_script);
1160       if (scriptID == MAXwordID)
1161         scriptID = wordmap_add_word(impl->scripts, union_script);
1162       if (scriptID == MAXwordID)
1163       {
1164         PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1165         return ESR_OUT_OF_MEMORY;
1166       }
1167 
1168       /* make the olabel point to the union */
1169       token->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1170     }
1171   }
1172   return ESR_SUCCESS;
1173 CLEANUP:
1174   return rc;
1175 }
1176 
1177 
1178 /**
1179  * Default implementation.
1180  */
SR_SemanticGraph_Reset(SR_SemanticGraph * self)1181 ESR_ReturnCode SR_SemanticGraph_Reset(SR_SemanticGraph* self)
1182 {
1183   struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1184   wordID slotid;
1185   arc_token* tmp;
1186   arc_token *tmp_arc_token_list;
1187 
1188   wordmap_reset(impl->scopes_olabels);
1189   wordmap_reset(impl->scripts);
1190   wordmap_reset(impl->ilabels);   //Rabih: I added this
1191   for (slotid = 1; slotid < impl->ilabels->num_slots; slotid++)
1192   {
1193     tmp = impl->arcs_for_slot[slotid];
1194     arc_tokens_free_list(impl->arc_token_list, &(impl->arc_token_freelist), tmp);
1195     impl->arcs_for_slot[slotid] = NULL;
1196 #if defined(SANITY_CHECK)
1197     int count;
1198     for (count = 0, tmp = impl->arc_token_freelist; tmp != NULL;
1199          tmp = ARC_TOKEN_PTR(impl->arc_token_list, tmp->next_token_index))
1200     {
1201       ASSERT(tmp->ilabel != 79324);
1202       tmp->ilabel = 79324;
1203       count++;
1204     }
1205     PLogError("after reset freelist size is %d", count);
1206 #endif
1207   }
1208 
1209   // Rabih : Reset the arc_token_list
1210   if(impl->ilabels->num_words == impl->ilabels->num_base_words)
1211   {}
1212   else{
1213   impl->arc_token_list_len = (size_t)(impl->arc_token_insert_start - impl->arc_token_list);
1214   tmp_arc_token_list= NEW_ARRAY(arc_token,impl->arc_token_list_len, L("semgraph.wordgraph"));
1215   memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1216 
1217   impl->arc_token_freelist = NULL;
1218 
1219   FREE(impl->arc_token_list);
1220   impl->arc_token_list = tmp_arc_token_list;
1221   }
1222   return ESR_SUCCESS;
1223 }
1224 
serializeArcTokenInfoV2(SR_SemanticGraphImpl * impl,PFile * fp)1225 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1226     PFile* fp)
1227 {
1228   int i;
1229   asr_uint32_t idx;
1230   arcID tmp[32];
1231 
1232   if (pfwrite(&impl->arc_token_list_len, 2, 1, fp) != 1)
1233     return ESR_WRITE_ERROR;
1234 
1235   idx = PTR_TO_IDX(impl->arc_token_freelist, impl->arc_token_list);
1236 
1237   if (pfwrite(&idx, 4, 1, fp) != 1)
1238     return ESR_WRITE_ERROR;
1239 
1240   idx = PTR_TO_IDX(impl->arc_token_insert_start, impl->arc_token_list);
1241 
1242   if (pfwrite(&idx, 4, 1, fp) != 1)
1243     return ESR_WRITE_ERROR;
1244 
1245   idx = 0;
1246   if (pfwrite(&idx, 4, 1, fp) != 1)
1247     return ESR_WRITE_ERROR;
1248 
1249   for (i = 0; i < impl->arc_token_list_len; ++i)
1250   {
1251     arc_token* token = &impl->arc_token_list[i];
1252     tmp[0] = token->ilabel;
1253     tmp[1] = token->olabel;
1254     tmp[2] = ARC_TOKEN_IDX(impl->arc_token_list, token->first_next_arc);
1255     tmp[3] = ARC_TOKEN_IDX(impl->arc_token_list, token->next_token_index);
1256     if (pfwrite(tmp, sizeof(tmp[0]), 4, fp) != 4)
1257       return ESR_WRITE_ERROR;
1258   }
1259 
1260   /* new, fixes load/save bug 2007 July 31
1261 	todo: change 4 to sizeof(asr_uint32) */
1262   if(1) {
1263 	asr_uint32_t idx[MAX_NUM_SLOTS];
1264 	for(i=0; i<MAX_NUM_SLOTS; i++)
1265 		idx[i] = PTR_TO_IDX(impl->arcs_for_slot[i], impl->arc_token_list);
1266 	if (pfwrite(&idx, 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS)
1267 			return ESR_WRITE_ERROR;
1268   }
1269 
1270   return ESR_SUCCESS;
1271 }
1272 
deserializeArcTokenInfoV2(SR_SemanticGraphImpl * impl,PFile * fp)1273 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1274     PFile* fp)
1275 {
1276   int i;
1277   asr_uint32_t idx;
1278   ESR_ReturnCode rc = ESR_SUCCESS;
1279   arcID tmp[32];
1280 
1281   if (pfread(&impl->arc_token_list_len, 2, 1, fp) != 1)
1282   {
1283     rc = ESR_READ_ERROR;
1284     PLogError(L("ESR_READ_ERROR: could not read arc_token_list_len"));
1285     return rc;
1286   }
1287 
1288   impl->arc_token_list = NEW_ARRAY(arc_token,
1289                                    impl->arc_token_list_len,
1290                                    L("semgraph.wordgraph"));
1291 
1292   if (impl->arc_token_list == NULL)
1293   {
1294     rc = ESR_OUT_OF_MEMORY;
1295     PLogError(ESR_rc2str(rc));
1296     return ESR_OUT_OF_MEMORY;
1297   }
1298 
1299   if (pfread(&idx, 4, 1, fp) != 1)
1300   {
1301     rc = ESR_READ_ERROR;
1302     PLogError(ESR_rc2str(rc));
1303     goto CLEANUP;
1304   }
1305 
1306   impl->arc_token_freelist = IDX_TO_PTR(idx, impl->arc_token_list);
1307 
1308   if (pfread(&idx, 4, 1, fp) != 1)
1309   {
1310     rc = ESR_READ_ERROR;
1311     PLogError(ESR_rc2str(rc));
1312     goto CLEANUP;
1313   }
1314 
1315   impl->arc_token_insert_start = IDX_TO_PTR(idx, impl->arc_token_list);
1316   // impl->arc_token_insert_start = impl->arc_token_list + impl->arc_token_list_len; // Rabih's fix
1317 
1318   if (pfread(&idx, 4, 1, fp) != 1)
1319   {
1320     rc = ESR_READ_ERROR;
1321     PLogError(ESR_rc2str(rc));
1322     goto CLEANUP;
1323   }
1324   impl->arc_token_insert_end = 0;
1325 
1326   for (i = 0; i < impl->arc_token_list_len; ++i)
1327   {
1328     arc_token* token = &impl->arc_token_list[i];
1329     if (pfread(tmp, sizeof(tmp[0]), 4, fp) != 4)
1330     {
1331       rc = ESR_READ_ERROR;
1332       goto CLEANUP;
1333     }
1334     token->ilabel = tmp[0];
1335     token->olabel = tmp[1];
1336     if (tmp[2] == MAXarcID)
1337       token->first_next_arc = ARC_TOKEN_NULL;
1338     else
1339       token->first_next_arc = ARC_TOKEN_LNK(impl->arc_token_list, tmp[2]);
1340     if (tmp[3] == MAXarcID)
1341       token->next_token_index = ARC_TOKEN_NULL;
1342     else
1343       token->next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, tmp[3]);
1344   }
1345 
1346   /* new, fixes load/save bug 2007 July 31
1347 	todo: change 4 to sizeof(asr_uint32) */
1348   if(1) {
1349 		asr_uint32_t idx[MAX_NUM_SLOTS];
1350 		if (pfread(&idx[0], 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) {
1351 			rc = ESR_READ_ERROR;
1352 			PLogError(ESR_rc2str(rc));
1353 			goto CLEANUP;
1354 		}
1355 		for(i=0; i<MAX_NUM_SLOTS; i++)
1356 			impl->arcs_for_slot[i] = IDX_TO_PTR(idx[i], impl->arc_token_list);
1357    }
1358 
1359   return ESR_SUCCESS;
1360 
1361 CLEANUP:
1362   FREE(impl->arc_token_list);
1363   impl->arc_token_list =
1364     impl->arc_token_freelist =
1365       impl->arc_token_insert_start =
1366         impl->arc_token_insert_end = NULL;
1367   return rc;
1368 }
1369