1 /*---------------------------------------------------------------------------*
2 * SemanticGraphImpl.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include "SR_SemprocPrefix.h"
21 #include "SR_SemprocDefinitions.h"
22 #include "SR_SemanticGraph.h"
23 #include "SR_SemanticGraphImpl.h"
24 #include "SR_SemanticProcessorImpl.h"
25 #include "ESR_ReturnCode.h"
26 #include "passert.h"
27 #include "pendian.h"
28 #include "plog.h"
29
30 static const char* MTAG = __FILE__;
31 #define AVG_SCRIPTS_PER_WORD 2.5
32 #define SLOTNAME_INDICATOR "__"
33 #define SLOTNAME_INDICATOR_LEN 2
34
35 #define PTR_TO_IDX(ptr, base) ((asr_uint32_t) (ptr == NULL ? 0xFFFFFFFFu : \
36 (asr_uint32_t)(ptr - base)))
37 #define IDX_TO_PTR(idx, base) (idx == 0xFFFFFFFFu ? NULL : base + idx)
38
SR_SemanticGraphCreate(SR_SemanticGraph ** self)39 ESR_ReturnCode SR_SemanticGraphCreate(SR_SemanticGraph** self)
40 {
41 SR_SemanticGraphImpl* impl;
42
43 if (self == NULL)
44 {
45 PLogError(L("ESR_INVALID_ARGUMENT"));
46 return ESR_INVALID_ARGUMENT;
47 }
48 impl = NEW(SR_SemanticGraphImpl, MTAG);
49 if (impl == NULL)
50 {
51 PLogError(L("ESR_OUT_OF_MEMORY"));
52 return ESR_OUT_OF_MEMORY;
53 }
54 /* do not assume NEW initialize impl as zero, do it here */
55 memset(impl, 0, sizeof(SR_SemanticGraphImpl));
56
57 impl->Interface.destroy = &SR_SemanticGraph_Destroy;
58 impl->Interface.unload = &SR_SemanticGraph_Unload;
59 impl->Interface.load = &SR_SemanticGraph_Load;
60 impl->Interface.save = &SR_SemanticGraph_Save;
61 impl->Interface.addWordToSlot = &SR_SemanticGraph_AddWordToSlot;
62 impl->Interface.reset = &SR_SemanticGraph_Reset;
63 impl->script_olabel_offset = SEMGRAPH_SCRIPT_OFFSET;
64 impl->scopes_olabel_offset = SEMGRAPH_SCOPE_OFFSET;
65
66 *self = (SR_SemanticGraph*) impl;
67 return ESR_SUCCESS;
68 }
69
70
71 /**
72 * Default implementation.
73 */
SR_SemanticGraph_Destroy(SR_SemanticGraph * self)74 ESR_ReturnCode SR_SemanticGraph_Destroy(SR_SemanticGraph* self)
75 {
76 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
77
78 if (self == NULL)
79 {
80 PLogError(L("ESR_INVALID_ARGUMENT"));
81 return ESR_INVALID_ARGUMENT;
82 }
83
84 FREE(impl);
85 return ESR_SUCCESS;
86 }
87
88 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp);
89
90
91 /* private function */
SR_SemanticGraph_LoadFromImage(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * g2g)92 ESR_ReturnCode SR_SemanticGraph_LoadFromImage(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* g2g)
93 {
94 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
95 PFile* fp = NULL;
96 struct
97 {
98 asr_uint32_t rec_context_image_size;
99 /* image data size of the recognition graph */
100 asr_uint32_t format;
101 }
102 header;
103 ESR_ReturnCode rc = ESR_SUCCESS;
104 ESR_BOOL isLittleEndian;
105 /*
106 #if __BYTE_ORDER==__LITTLE_ENDIAN
107 isLittleEndian = ESR_TRUE;
108 #else
109 isLittleEndian = ESR_FALSE;
110 #endif
111 */
112 isLittleEndian = ESR_TRUE;
113
114 fp = pfopen ( g2g, L("rb"));
115 /* CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
116 CHKLOG(rc, PFileOpen(fp, L("rb")));*/
117
118 if ( fp == NULL )
119 goto CLEANUP;
120
121 /* header */
122 if (pfread(&header, 4, 2, fp) != 2)
123 {
124 rc = ESR_READ_ERROR;
125 PLogError(ESR_rc2str(rc));
126 goto CLEANUP;
127 }
128
129 if (pfseek(fp, header.rec_context_image_size, SEEK_SET))
130 {
131 rc = ESR_READ_ERROR;
132 PLogError(L("ESR_READ_ERROR: could not seek to semgraph data"));
133 goto CLEANUP;
134 }
135
136 if (header.format == IMAGE_FORMAT_V2)
137 {
138 rc = sr_semanticgraph_loadV2(impl, ilabels, fp);
139 }
140 else
141 {
142 rc = ESR_INVALID_STATE;
143 PLogError("PCLG.txt P.txt inconsistency");
144 goto CLEANUP;
145 }
146
147 CLEANUP:
148 if (fp)
149 pfclose (fp);
150 if (rc != ESR_SUCCESS)
151 {
152 if (impl->arc_token_list != NULL)
153 {
154 FREE(impl->arc_token_list);
155 impl->arc_token_list = NULL;
156 }
157 }
158 return rc;
159 }
160
161 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
162 PFile* fp);
163
164 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
165 PFile* fp);
166
sr_semanticgraph_loadV2(SR_SemanticGraphImpl * impl,wordmap * ilabels,PFile * fp)167 ESR_ReturnCode sr_semanticgraph_loadV2(SR_SemanticGraphImpl* impl, wordmap* ilabels, PFile* fp)
168 {
169 unsigned int i, nfields;
170 ESR_ReturnCode rc = ESR_SUCCESS;
171 struct
172 {
173 asr_uint32_t format;
174 asr_uint32_t sgtype;
175 }
176 header;
177 asr_uint32_t tmp[32];
178
179 if (pfread(&header, 4/*sz*/, 2/*ni*/, fp) != 2)
180 {
181 rc = ESR_READ_ERROR;
182 PLogError(L("ESR_READ_ERROR: could not read V2"));
183 goto CLEANUP;
184 }
185
186 if (header.sgtype == GrammarTypeItemList)
187 {
188 /*
189 tmp = new unsigned short[num_words];
190 if( pfread( tmp, sizeof(tmp[0]), num_words, fp) != num_words) {
191 rc = ESR_READ_ERROR;
192 PLogMessage("can't read %d word script assocs\n", num_words);
193 goto CLEANUP;
194 }
195 */
196 /* convert these to an arc_token_list or whatever */
197 PLogError("not supported v2 itemlist type");
198 rc = ESR_INVALID_STATE;
199 goto CLEANUP;
200
201 }
202 else
203 {
204
205 nfields = 2;
206 if (pfread(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
207 {
208 rc = ESR_WRITE_ERROR;
209 PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
210 goto CLEANUP;
211 }
212 i = 0;
213 impl->script_olabel_offset = (wordID)tmp[i++];
214 impl->scopes_olabel_offset = (wordID)tmp[i++];
215 ASSERT(i == nfields);
216
217 /* word arcs */
218 if ((rc = deserializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
219 {
220 PLogError(ESR_rc2str(rc));
221 goto CLEANUP;
222 }
223
224 /* use the ilabels provided externally (from recog graph ilabels) */
225 impl->ilabels = ilabels;
226
227 /* scopes */
228 if ((rc = deserializeWordMapV2(&impl->scopes_olabels, fp)) != ESR_SUCCESS)
229 {
230 PLogError(ESR_rc2str(rc));
231 goto CLEANUP;
232 }
233
234 /* scripts */
235 if ((rc = deserializeWordMapV2(&impl->scripts, fp)) != ESR_SUCCESS)
236 {
237 PLogError(ESR_rc2str(rc));
238 goto CLEANUP;
239 }
240 }
241 CLEANUP:
242 return rc;
243 }
244
245
get_first_arc_leaving_node1(arc_token * arc_token_list,arcID num_arcs,nodeID node)246 static arc_token_lnk get_first_arc_leaving_node1(arc_token* arc_token_list,
247 arcID num_arcs,
248 nodeID node)
249 {
250 arcID i;
251 for (i = 0; i < num_arcs; i++)
252 {
253 if ((nodeID)(int)arc_token_list[i].next_token_index == node)
254 return ARC_TOKEN_LNK(arc_token_list, i);
255 }
256 return ARC_TOKEN_NULL;
257 }
258
strlen_with_null(const char * word)259 static int strlen_with_null(const char* word)
260 { /* from srec_context.c */
261 int len = strlen(word) + 1;
262 if (len % 2 == 1) len++;
263 return len;
264 }
265 /* private function */
SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * basename,int num_words_to_add)266 ESR_ReturnCode SR_SemanticGraph_LoadFromTextFiles(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
267 {
268 ESR_ReturnCode rc = ESR_FATAL_ERROR;
269 arcID num_scripts;
270 int isConstString = 0;
271 LCHAR filename[MAX_STRING_LEN];
272 LCHAR line[MAX_SCRIPT_LEN];
273 LCHAR iword[MAX_STRING_LEN];
274 LCHAR oword[MAX_SCRIPT_LEN];
275 LCHAR *p;
276 unsigned int max_num_arc_tokens;
277 nodeID from_node, into_node;
278 wordID ilabel = 0;
279 labelID olabel = 0;
280 arc_token *atoken;
281 arc_token *last_atoken;
282 costdata cost = 0;
283 arcID num_arcs;
284 arc_token* arc_token_list;
285 long fpos;
286 PFile* p_text_file = NULL;
287 PFile* scripts_file;
288 SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
289 size_t lineNo;
290 unsigned int i;
291 wordID num_scope_words;
292 asr_int32_t num_scope_chars;
293 LCHAR* _tMp; /* used by IS_SCOPE_MARKER() below */
294
295 /* use the ilables that are provided externally (from recog graph ilabels) */
296 semgraph->ilabels = ilabels;
297
298
299
300 /* try to open the .script file */
301 LSTRCPY(filename, basename);
302 LSTRCAT(filename, ".script");
303 scripts_file = pfopen ( filename, L("r") );
304 /* CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &scripts_file));
305 CHKLOG(rc, PFileOpen(scripts_file, L("r")));*/
306
307 if ( scripts_file == NULL )
308 {
309 rc = ESR_OPEN_ERROR;
310 goto CLEANUP;
311 }
312
313 /* Load the scripts file
314 assumptions:
315
316 - the scripts file has each line ordered starting from 0 as such
317 <integer><space><script>
318
319 - the integer MUST become the index of the script in the wordmap
320
321 - output labels referenced in the semgraph are the integers (wordmap index) prepending with '_'
322
323 - output labels stored in the semgraph are actually integers which are equal to
324 script_olabel_offset + <integer>
325 */
326
327 /* determine number of words/chars to allocate */
328 fpos = pftell(scripts_file);
329 for (i = num_scripts = 0; pfgets(line, MAX_SCRIPT_LEN, scripts_file); num_scripts++)
330 {
331 size_t len = LSTRLEN(line) + 1;
332 if (len % 2) len++;
333 i = i + len; /* count the chars */
334 }
335 pfseek(scripts_file, fpos, SEEK_SET);
336
337 /* on each line I will have 1 big word */
338 /* figure that each script for dynamically added words will be a simple assignment
339 like myVar='someVal' ... which looks like almost 2.5 words, hence *2.5 */
340 wordmap_create(&semgraph->scripts, i, num_scripts, (int)AVG_SCRIPTS_PER_WORD*num_words_to_add);
341
342 /* load up all the information */
343 lineNo = 0;
344 while (pfgets(line, MAX_SCRIPT_LEN, scripts_file))
345 {
346 ASSERT( sizeof( iword[0]) == sizeof(char)); // else more code to write!
347 if (sscanf(line, "%s ", iword) == 1)
348 {
349 LSTRCPY(oword, line + LSTRLEN(iword) + 1);
350 /* may actually have spaces in it and this is messing me up ... here is the fix */
351 /* copy the line starting after the iword */
352 for (i = 0, p = line + LSTRLEN(iword) + 1; *p; p++)
353 {
354 if (*p == '\\')
355 {
356 if (isConstString)
357 oword[i++] = *p;
358 ++p;
359 }
360 else if (*p == '\'')
361 isConstString = (isConstString ? 0 : 1) ; /* toggle */
362 if (isConstString || !isspace(*p))
363 oword[i++] = *p;
364 }
365 oword[i] = '\0';
366
367 /* make sure that the index in the wordmap matches the line number */
368 if (wordmap_add_word(semgraph->scripts, oword) != lineNo)
369 {
370 PLogError(L("ESR_READ_ERROR: internal error adding script (%d)"), num_words_to_add);
371 return ESR_NO_MATCH_ERROR;
372 }
373 lineNo++;
374 }
375 else
376 {
377 PLogMessage(L("can't parse line %s"), line);
378 passert(0);
379 }
380 }
381 pfclose (scripts_file);
382
383 /* try to open the P.txt file */
384 LSTRCPY(filename, basename);
385 LSTRCAT(filename, ".P.txt");
386 p_text_file = pfopen ( filename, L("r"));
387 /* CHKLOG(rc, PFileSystemCreatePFile(filename, TRUE, &p_text_file));
388 CHKLOG(rc, PFileOpen(p_text_file, L("r")));*/
389
390 if ( p_text_file == NULL )
391 goto CLEANUP;
392
393 /* determine number of word arcs to allocate */
394 fpos = pftell(p_text_file);
395 num_scope_words = 0;
396 num_scope_chars = 0;
397 for (num_arcs = 0; pfgets(line, MAX_STRING_LEN, p_text_file); ++num_arcs)
398 {
399 if (num_arcs == MAXarcID)
400 break; /* error */
401 if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
402 {
403 if (IS_SCOPE_MARKER(oword)) {
404 num_scope_words++;
405 num_scope_chars += strlen_with_null( oword);
406 if(num_scope_chars) num_scope_chars++ ;
407 }
408 }
409 }
410 max_num_arc_tokens = num_arcs + (arcID)num_words_to_add;
411 MEMCHK(rc, max_num_arc_tokens, MAXarcID);
412 pfseek(p_text_file, fpos, SEEK_SET);
413
414 semgraph->arc_token_list = NEW_ARRAY(arc_token,max_num_arc_tokens, L("semgraph.wordgraph"));
415 arc_token_list = semgraph->arc_token_list;
416 /* need to initialize my wordmap */
417 wordmap_create(&semgraph->scopes_olabels, num_scope_chars, num_scope_words,0); // max_num_arc_tokens);
418
419 /* 1. first load up all the information */
420 i = 0;
421 while (pfgets(line, MAX_STRING_LEN, p_text_file))
422 {
423 if (sscanf(line, "%hu\t%hu\t%[^\t]\t%[^\t\n\r]", &from_node, &into_node, iword, oword) == 4)
424 {
425 /* the cost is 0 by default */
426 cost = 0;
427 /* since I am reading strings, and I want to store integers, I need to get
428 the index of the string by looking up in the ilabels wordmap */
429 ilabel = wordmap_find_index(ilabels, iword);
430
431 /* now for the olabels, depending on the type of the label, I either use the index directly
432 or save the index in a wordmap which will eventually give me the right index.
433 Remember that the index must be offset by a certain value depending on which wordmap I'm using */
434
435 if (IS_SCRIPT_MARKER(oword)) /* olabel type: script */
436 {
437 olabel = (labelID) atoi(&oword[1]);
438 olabel = (wordID)(olabel + semgraph->script_olabel_offset); /* the offset */
439 }
440 else if (IS_SCOPE_MARKER(oword)) /* olabel type: scope marker */
441 {
442 /* check if the label is already in the wordmap, and reuse index */
443 olabel = wordmap_find_index(semgraph->scopes_olabels, oword);
444
445 if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
446 olabel = wordmap_add_word(semgraph->scopes_olabels, oword);
447 olabel = (wordID)(olabel + semgraph->scopes_olabel_offset); /* the offset */
448 }
449 else /* olabel type: input symbols hopefully !!! */
450 {
451 /* if oword does not have a \t in the end, add a \t*/
452
453 /* check if the label is already in the wordmap, and reuse index */
454 olabel = wordmap_find_index(ilabels, oword);
455
456 if (olabel == MAXwordID) /* not found so add to wordmap and get new index */
457 PLogMessage(L("output label not found: %s"), oword);
458 }
459
460 }
461 else if (sscanf(line, "%hu", &from_node) == 1)
462 {
463 into_node = MAXnodeID;
464 ilabel = MAXwordID;
465 olabel = MAXwordID;
466 cost = 0;
467 }
468 else
469 {
470 PLogMessage(L("can't parse line %s"), line);
471 passert(0);
472 }
473
474 /* okay, now that I have the data for the current arc, save it to the arc_token data structure*/
475 atoken = &arc_token_list[i];
476 ++i;
477
478 atoken->ilabel = ilabel;
479 atoken->olabel = olabel;
480 /* atoken->cost = cost; not used for now */
481
482 /* initially this stores INTEGERS !!! , I need to cross-reference the integers with the
483 appropriate arc_token pointers (in the next steps for the algorithm) */
484 atoken->first_next_arc = (arc_token_lnk)into_node;
485 atoken->next_token_index = (arc_token_lnk)from_node;
486 }
487 num_arcs = (arcID) i;
488
489 pfclose(p_text_file);
490 p_text_file = NULL;
491
492 wordmap_setbase(semgraph->scopes_olabels);
493 wordmap_ceiling(semgraph->scopes_olabels); /* we won't be adding scopes! */
494 wordmap_setbase(semgraph->scripts);
495
496 /* 2. now do the internal cross references */
497 /* in this pass we build the 1-to-1 links, and n-to-1 links in a graph */
498 /* in other words... first_next_arc points to the first arc leaving the node */
499 for (i = 0; i < num_arcs; ++i)
500 {
501 atoken = &arc_token_list[i];
502 into_node = (nodeID)(int)atoken->first_next_arc; /* get the integer */
503 atoken->first_next_arc = /* converts the integer id to a arc_token pointer */
504 get_first_arc_leaving_node1(arc_token_list, num_arcs, (nodeID)(int)atoken->first_next_arc);
505 }
506
507 /* 3. now do more internal cross refs */
508 /* in this pass we build the 1-to-n links */
509 /* in other words ... setup the linked list of all arc leaving from the same node */
510 last_atoken = &arc_token_list[0];
511 for (i = 1; i < num_arcs; ++i)
512 {
513 atoken = &arc_token_list[i];
514 /* if this arc and the last one do NOT leave the same node (i.e. from_node, see above),
515 then the next_token_index is not used */
516 if (atoken->next_token_index != last_atoken->next_token_index)
517 last_atoken->next_token_index = ARC_TOKEN_NULL;
518 else
519 last_atoken->next_token_index = ARC_TOKEN_LNK(arc_token_list, i);
520 last_atoken = atoken;
521 }
522 last_atoken->next_token_index = ARC_TOKEN_NULL;
523
524 #if DEBUG_ASTAR
525 /* under debug, it's nice to be able to see the words leaving the
526 destination node, they are stored sequentially in the debug ary */
527 for (i = 0; i < num_arcs; i++)
528 {
529 LCHAR * p;
530 arc_token* tmp;
531 atoken = &arc_token_list[i];
532 atoken->debug[0] = 0;
533 tmp = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
534 for (; tmp; tmp = ARC_TOKEN_PTR(arc_token_list, tmp->next_token_index))
535 {
536 if (tmp->first_next_arc == ARC_TOKEN_NULL)
537 p = "END";
538 else if (!tmp->label)
539 p = "NULL";
540 else
541 p = tmp->label;
542 if (strlen(atoken->debug) + strlen(p) + 6 < 64)
543 {
544 strcat(atoken->debug, p);
545 strcat(atoken->debug, " ");
546 }
547 else
548 {
549 strcat(atoken->debug, "...");
550 break;
551 }
552 }
553 }
554 #endif
555 semgraph->arc_token_list_len = (arcID)max_num_arc_tokens;
556 /* initialize the freelist */
557 if (num_arcs < max_num_arc_tokens)
558 {
559 semgraph->arc_token_freelist = &semgraph->arc_token_list[num_arcs];
560 for (i = num_arcs; i < max_num_arc_tokens - 1; i++)
561 {
562 semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
563 semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(semgraph->arc_token_list, (i + 1));
564 }
565 semgraph->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
566 semgraph->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
567 }
568 else
569 semgraph->arc_token_freelist = NULL;
570
571 /* for dynamic addition */
572 for (i = 0; i < MAX_NUM_SLOTS; i++)
573 semgraph->arcs_for_slot[i] = NULL;
574
575 semgraph->arc_token_insert_start = semgraph->arc_token_list + num_arcs;
576 semgraph->arc_token_insert_end = NULL;
577 return ESR_SUCCESS;
578 CLEANUP:
579 if (p_text_file)
580 pfclose (p_text_file);
581 return rc;
582 }
583
SR_SemanticGraph_Load(SR_SemanticGraph * self,wordmap * ilabels,const LCHAR * basename,int num_words_to_add)584 ESR_ReturnCode SR_SemanticGraph_Load(SR_SemanticGraph* self, wordmap* ilabels, const LCHAR* basename, int num_words_to_add)
585 {
586 ESR_ReturnCode rc;
587
588 if (LSTRSTR(basename, L(".g2g")))
589 {
590 rc = SR_SemanticGraph_LoadFromImage(self, ilabels, basename);
591 }
592 else
593 {
594 rc = SR_SemanticGraph_LoadFromTextFiles(self, ilabels, basename, num_words_to_add);
595 }
596 return rc;
597 }
598
599 /**
600 * Unload Sem graph
601 */
SR_SemanticGraph_Unload(SR_SemanticGraph * self)602 ESR_ReturnCode SR_SemanticGraph_Unload(SR_SemanticGraph* self)
603 {
604 SR_SemanticGraphImpl* semgraph = (SR_SemanticGraphImpl*) self;
605
606 /* see the wordmap_create in the Load function */
607 wordmap_destroy(&semgraph->scopes_olabels);
608 wordmap_destroy(&semgraph->scripts);
609
610 FREE(semgraph->arc_token_list);
611 semgraph->arc_token_list = 0;
612 return ESR_SUCCESS;
613 }
614
615 ESR_ReturnCode sr_semanticgraph_saveV1(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
616 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g);
617
SR_SemanticGraph_Save(SR_SemanticGraph * self,const LCHAR * g2g,int version_number)618 ESR_ReturnCode SR_SemanticGraph_Save(SR_SemanticGraph* self, const LCHAR* g2g, int version_number)
619 {
620 SR_SemanticGraphImpl* impl = (SR_SemanticGraphImpl*) self;
621 ESR_ReturnCode rc = ESR_SUCCESS;
622
623 if (version_number == 2)
624 {
625 rc = sr_semanticgraph_saveV2(impl, g2g);
626 }
627 else
628 {
629 PLogError("invalid version_number %d\n", version_number);
630 rc = ESR_INVALID_ARGUMENT;
631 }
632 return rc;
633 }
634
635
sr_semanticgraph_get_type(SR_SemanticGraphImpl * impl)636 int sr_semanticgraph_get_type(SR_SemanticGraphImpl* impl)
637 {
638 arc_token *atoken, *arc_token_list = impl->arc_token_list;
639 arc_token_lnk mergept;
640 int expected_ilabel;
641 atoken = impl->arc_token_list;
642
643 /* 0 1 eps {
644 1 2 13e_avenue myRoot}
645 ...
646 1 2 13e_avenue myRoot}
647 2 */
648 if (atoken->ilabel != WORD_EPSILON_LABEL)
649 return GrammarTypeBNF;
650 atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc);
651 if (!atoken)
652 return GrammarTypeBNF;
653 mergept = atoken->first_next_arc;
654 expected_ilabel = NUM_ITEMLIST_HDRWDS;
655 for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->next_token_index))
656 {
657 if (atoken->first_next_arc != mergept)
658 return GrammarTypeBNF;
659 if (atoken->ilabel != expected_ilabel)
660 return GrammarTypeBNF;
661 expected_ilabel++;
662 }
663 if (expected_ilabel != impl->ilabels->num_words)
664 return GrammarTypeBNF;
665 atoken = ARC_TOKEN_PTR(arc_token_list, mergept);
666 for (; atoken; atoken = ARC_TOKEN_PTR(arc_token_list, atoken->first_next_arc))
667 {
668 if (atoken->next_token_index != ARC_TOKEN_NULL)
669 return GrammarTypeBNF;
670 if (atoken->ilabel != WORD_EPSILON_LABEL &&
671 !(atoken->ilabel == MAXwordID && atoken->olabel == MAXwordID))
672 return GrammarTypeBNF;
673 }
674 return GrammarTypeItemList;
675 }
676
677 #define SEMGR_OUTPUT_FORMAT_V2 478932784
678
sr_semanticgraph_saveV2(SR_SemanticGraphImpl * impl,const LCHAR * g2g)679 ESR_ReturnCode sr_semanticgraph_saveV2(SR_SemanticGraphImpl* impl, const LCHAR* g2g)
680 {
681 ESR_ReturnCode rc;
682 PFile* fp;
683 asr_uint32_t tmp[32];
684 struct
685 {
686 asr_uint32_t format;
687 asr_uint32_t sgtype;
688 }
689 header;
690 unsigned int i, nfields;
691
692 fp = pfopen ( g2g, L("r+b"));
693 /* CHKLOG(rc, PFileSystemCreatePFile(g2g, isLittleEndian, &fp));
694 CHKLOG(rc, PFileOpen(fp, L("r+b")));*/
695
696 if ( fp == NULL )
697 {
698 rc = ESR_OPEN_ERROR;
699 goto CLEANUP;
700 }
701
702 pfseek(fp, 0, SEEK_END);
703
704 header.format = IMAGE_FORMAT_V2;
705 header.sgtype = sr_semanticgraph_get_type(impl);
706 header.sgtype = GrammarTypeBNF;
707
708 #ifdef SREC_ENGINE_VERBOSE_LOGGING
709 PLogMessage("sr_semanticgraph_saveV2() semgraphtype %d", header.sgtype);
710 #endif
711 if (pfwrite(&header, 4 /*sz*/, 2/*ni*/, fp) != 2)
712 {
713 rc = ESR_WRITE_ERROR;
714 PLogError(L("ESR_WRITE_ERROR: could not write V2"));
715 goto CLEANUP;
716 }
717
718 if (header.sgtype == GrammarTypeItemList)
719 {
720 arc_token *parser, *atok;
721
722 /* write num_words size array of short script ids
723 this might be just a y=x array, but it could be there
724 are synonyms, eg. NEW_YORK NEW_YORK_CITY -> same script
725 */
726 parser = impl->arc_token_list;
727 parser = ARC_TOKEN_PTR(impl->arc_token_list, parser->first_next_arc);
728 for (i = NUM_ITEMLIST_HDRWDS; i < impl->ilabels->num_words; i++)
729 {
730 for (atok = parser; atok; atok = ARC_TOKEN_PTR(impl->arc_token_list, atok->next_token_index))
731 {
732 if (atok->ilabel == i) break;
733 }
734 if (!atok)
735 {
736 rc = ESR_INVALID_STATE;
737 PLogError("Can't find word %d in semgraph\n", i);
738 goto CLEANUP;
739 }
740 tmp[0] = atok->olabel;
741 if (pfwrite(tmp, sizeof(tmp[0]), 1, fp) != 1)
742 {
743 rc = ESR_WRITE_ERROR;
744 PLogError(L("ESR_WRITE_ERROR: could not write V2"));
745 goto CLEANUP;
746 }
747 }
748 if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
749 {
750 PLogError(ESR_rc2str(rc));
751 goto CLEANUP;
752 }
753 }
754 else
755 {
756
757 i = 0;
758 tmp[i++] = impl->script_olabel_offset;
759 tmp[i++] = impl->scopes_olabel_offset;
760 nfields = i;
761
762 if (pfwrite(tmp, sizeof(tmp[0]), nfields, fp) != nfields)
763 {
764 rc = ESR_WRITE_ERROR;
765 PLogError(L("ESR_WRITE_ERROR: could not write script_olabel_offset"));
766 goto CLEANUP;
767 }
768
769 /* word arcs */
770 if ((rc = serializeArcTokenInfoV2(impl, fp)) != ESR_SUCCESS)
771 {
772 PLogError(ESR_rc2str(rc));
773 goto CLEANUP;
774 }
775
776 /* do not WRITE ilabels... this is a ref to the olabels from rec context */
777
778 /* scopes */
779 if ((rc = serializeWordMapV2(impl->scopes_olabels, fp)) != ESR_SUCCESS)
780 {
781 PLogError(ESR_rc2str(rc));
782 goto CLEANUP;
783 }
784
785 if ((rc = serializeWordMapV2(impl->scripts, fp)) != ESR_SUCCESS)
786 {
787 PLogError(ESR_rc2str(rc));
788 goto CLEANUP;
789 }
790 #ifdef SREC_ENGINE_VERBOSE_LOGGING
791 PLogMessage("G2G done WR semg %d", pftell(fp));
792 #endif
793 }
794 rc = ESR_SUCCESS;
795 CLEANUP:
796 if (fp)
797 pfclose (fp);
798 return rc;
799 }
800
arc_tokens_find_ilabel(arc_token * base,arc_token * arc_token_list,wordID ilabel)801 arc_token* arc_tokens_find_ilabel(arc_token* base, arc_token* arc_token_list, wordID ilabel)
802 {
803 arc_token* p;
804 for (p = arc_token_list; p != NULL; p = ARC_TOKEN_PTR(base, p->next_token_index))
805 if (p->ilabel == ilabel) return p;
806 return NULL;
807 }
808
arc_tokens_get_free(arc_token * base,arc_token ** arc_token_freelist)809 arc_token* arc_tokens_get_free(arc_token* base, arc_token** arc_token_freelist)
810 {
811 arc_token* tmp = (*arc_token_freelist);
812 if (tmp == NULL)
813 return NULL;
814 (*arc_token_freelist) = ARC_TOKEN_PTR(base, tmp->next_token_index);
815 tmp->ilabel = tmp->olabel = 0;
816 tmp->next_token_index = ARC_TOKEN_NULL;
817 tmp->first_next_arc = ARC_TOKEN_NULL;
818 return tmp;
819 }
820
arc_tokens_list_size(arc_token * base,arc_token * head)821 int arc_tokens_list_size(arc_token* base, arc_token* head)
822 {
823 arc_token* tmp = head;
824 int count = 0;
825 for (; tmp; tmp = ARC_TOKEN_PTR(base, tmp->next_token_index))
826 {
827 count++;
828 }
829 return count;
830 }
831
arc_tokens_free_list(arc_token * base,arc_token ** arc_token_freelist,arc_token * head)832 void arc_tokens_free_list(arc_token* base, arc_token** arc_token_freelist, arc_token* head)
833 {
834 arc_token *tail, *next = (arc_token*)1;
835 if (head == NULL)
836 return;
837 for (tail = head; ; tail = next)
838 {
839 next = ARC_TOKEN_PTR(base, tail->next_token_index);
840 if (next == NULL) break;
841 }
842 tail->next_token_index = ARC_TOKEN_PTR2LNK(base, (*arc_token_freelist));
843 *arc_token_freelist = head;
844 }
845
find_in_union_of_scripts(const LCHAR * union_script,const LCHAR * script,ESR_BOOL * isFound)846 ESR_ReturnCode find_in_union_of_scripts(const LCHAR* union_script, const LCHAR* script, ESR_BOOL* isFound)
847 {
848 const LCHAR* start;
849 const LCHAR* end;
850 const LCHAR* p;
851 const LCHAR* q;
852
853 if (union_script == NULL || script == NULL)
854 return ESR_INVALID_ARGUMENT;
855
856 start = LSTRCHR(union_script, L('\''));
857 if (start == NULL)
858 return ESR_INVALID_ARGUMENT;
859
860 start++; /* point to first char after \' */
861
862 end = LSTRCHR(start, L('\'')); /* point to last \' */
863 if (end == NULL)
864 return ESR_INVALID_ARGUMENT;
865
866 p = start;
867
868 start = LSTRCHR(script, L('\''));
869 if (start == NULL)
870 return ESR_INVALID_ARGUMENT;
871 start++; /* point to first char after \' */
872
873 q = start;
874
875 while (p < end)
876 {
877 if (*p == MULTIPLE_MEANING_JOIN_CHAR) /* if at the end of a meaning (not end of union)
878 and p matched q all the way up to join char then found! */
879 {
880 *isFound = ESR_TRUE;
881 return ESR_SUCCESS;
882 }
883 else if (*p == *q) /* while same keep going */
884 {
885 if (*p == *(end - 1)) /* if at the end and p matched q all the way then found! */
886 {
887 *isFound = ESR_TRUE;
888 return ESR_SUCCESS;
889 }
890 q++;
891 }
892 else /* skip to next meaning after join char */
893 {
894 while (*p != MULTIPLE_MEANING_JOIN_CHAR && p < end)
895 p++;
896 /* reset q */
897 q = start;
898 }
899 p++;
900 }
901
902 *isFound = ESR_FALSE;
903 return ESR_SUCCESS;
904 }
905
906 #define QUOTE_CHAR L('\'')
count_num_literals(const LCHAR * a,const LCHAR ** start_points,int max_num_start_points)907 int count_num_literals(const LCHAR* a, const LCHAR** start_points, int max_num_start_points)
908 {
909 int num = 0;
910 const LCHAR *p, *q = a;
911 const LCHAR *end = a + LSTRLEN(a);
912 while (1)
913 {
914 /* look for starting QUOTE_CHAR */
915 for (p = q; p < end; p++)
916 {
917 if (*p == ESC_CHAR) p++;
918 else if (*p == QUOTE_CHAR) break;
919 }
920 if (p == end) break;
921 if (num > max_num_start_points) break; /* just abort the counting! */
922 start_points[num] = p;
923 /* look for ending QUOTE_CHAR */
924 for (q = p + 1; q < end; q++)
925 {
926 if (*q == ESC_CHAR) q++;
927 else if (*q == QUOTE_CHAR) break;
928 }
929 if (q == end) /* does not close! */
930 return -1;
931 p = ++q;
932 num++;
933 }
934 return num;
935 }
union_literal_pair(LCHAR * o,LCHAR * a,LCHAR * b,LCHAR ** pptra)936 int union_literal_pair(LCHAR* o, LCHAR* a, LCHAR* b, LCHAR** pptra)
937 {
938 LCHAR *enda, *ptra, *endb, *ptrb;
939 LCHAR *p, *ptro;
940 enda = a + LSTRLEN(a);
941 endb = b + LSTRLEN(b);
942 /* capture the data from a to ptra */
943 for (ptra = a + 1; ptra < enda; ptra++)
944 {
945 if (*ptra == ESC_CHAR) ptra++;
946 else if (*ptra == QUOTE_CHAR) break;
947 }
948 /* capture the data from b to ptrb */
949 for (ptrb = b + 1; ptrb < endb; ptrb++)
950 {
951 if (*ptrb == ESC_CHAR) ptrb++;
952 else if (*ptrb == QUOTE_CHAR) break;
953 }
954 /* now make the output */
955 ptro = o;
956 *ptro++ = QUOTE_CHAR;
957 for (p = a + 1; p < ptra; p++) *ptro++ = *p;
958 *ptro++ = MULTIPLE_MEANING_JOIN_CHAR;
959 for (p = b + 1; p < ptrb; p++) *ptro++ = *p;
960 *ptro++ = QUOTE_CHAR;
961 *ptro++ = 0;
962 *pptra = ptra + 1;
963 return 0;
964 }
965
966 /* now handles n1='52';n2='62'; UNION n1='53';nx='63'; */
967
make_union_of_scripts(LCHAR * union_script,const size_t max_len,const LCHAR * a,const LCHAR * b)968 ESR_ReturnCode make_union_of_scripts(LCHAR* union_script, const size_t max_len, const LCHAR* a, const LCHAR* b)
969 {
970 int i, num_literals_in_a, num_literals_in_b;
971 LCHAR *spa[8], *spb[8], *spo[8], *ptra;
972
973 if (a == NULL || b == NULL)
974 return ESR_INVALID_ARGUMENT;
975
976 num_literals_in_a = count_num_literals(a, (const LCHAR **)spa, 8);
977 num_literals_in_b = count_num_literals(b, (const LCHAR **)spb, 8);
978
979 if (num_literals_in_a == 0 && num_literals_in_b == 0)
980 {
981 if (LSTRLEN(a) > max_len) return ESR_BUFFER_OVERFLOW;
982 else
983 {
984 LSTRCPY(union_script, a);
985 return ESR_SUCCESS;
986 }
987 }
988 else if (num_literals_in_a != num_literals_in_b)
989 {
990 return ESR_INVALID_ARGUMENT;
991 }
992
993 /* V='Springfield_IL' union V='Springfield_MA' is V='Springfield_IL#Springfield_MA' */
994 /* 18 + 18 -2 = 33 + 1 for NULL */
995 if ((LSTRLEN(a) + LSTRLEN(b) - 2) > max_len)
996 {
997 PLogError("Temp buffer (size %d) to hold union of multiple meanings (size %d) is too small", max_len, (LSTRLEN(a) + LSTRLEN(b) - 2));
998 return ESR_BUFFER_OVERFLOW;
999 }
1000
1001 LSTRCPY(union_script, a);
1002 for (i = 0; i < num_literals_in_a; i++)
1003 {
1004 count_num_literals(union_script, (const LCHAR **)spo, 8);
1005 /* here union_script is n0='52';n1='62'; */
1006 union_literal_pair(spo[i], spa[i], spb[i], &ptra);
1007 #ifdef _WIN32
1008 if (LSTRLEN(spo[i]) > MAX_SEMPROC_VALUE)
1009 pfprintf(PSTDOUT, "Warning: won't be able to parse this script! len %d>%d %s\n", LSTRLEN(spo[i]), MAX_SEMPROC_VALUE, spo[i]);
1010 #endif
1011 /* here union_script is n0='52#53' */
1012 LSTRCAT(union_script, ptra);
1013 /* here union_script is n0='52#53';n1='62'; */
1014 }
1015 return ESR_SUCCESS;
1016 }
1017
1018 /**
1019 * Default implementation.
1020 */
SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph * self,const LCHAR * _slot,const LCHAR * word,const LCHAR * script,const ESR_BOOL newWordAddedToFST)1021 ESR_ReturnCode SR_SemanticGraph_AddWordToSlot(SR_SemanticGraph* self, const LCHAR* _slot, const LCHAR* word, const LCHAR* script, const ESR_BOOL newWordAddedToFST)
1022 {
1023 struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1024 arc_token *token, *tmp;
1025 arc_token *tmp_arc_token_list;
1026 wordID wdID, scriptID, old_scriptID;
1027 wordID slotID;
1028 LCHAR union_script[MAX_STRING_LEN]; /* sizeof used elsewhere */
1029 ESR_ReturnCode rc; int i;
1030 int tmp_arc_token_list_len;
1031 int offset;
1032 #define MAX_WORD_LEN 128
1033 char veslot[MAX_WORD_LEN];
1034
1035 if (script == NULL || *script == L('\0') || !LSTRCMP(script, L("NULL")))
1036 return ESR_SUCCESS; /* no script to add so keep going */
1037
1038 /* find out if the word I am adding already exists. If it already exists, then that means that I
1039 potentially am adding an alternate meaning for the word */
1040 /* the slotname in .PCLG.txt and .map files use __ as the indicator. Xufang */
1041 if(_slot[0] == '@') {
1042 strcpy(veslot,SLOTNAME_INDICATOR);
1043 strcat(veslot,_slot+1);
1044 strcat(veslot,SLOTNAME_INDICATOR);
1045 } else
1046 strcpy(veslot, _slot);
1047
1048 slotID = wordmap_find_rule_index(impl->ilabels, veslot);
1049 if (slotID == MAXwordID)
1050 {
1051 PLogError(L("ESR_NO_MATCH_ERROR: Could not find slotID in wordmap %s"), _slot);
1052 return ESR_NO_MATCH_ERROR;
1053 }
1054 wdID = wordmap_find_index_in_rule(impl->ilabels, word, slotID);
1055 if (wdID == MAXwordID)
1056 {
1057 PLogError(L("ESR_NO_MATCH_ERROR: Could not find wordID/slotID in wordmap %s/%d"), word, slotID);
1058 return ESR_NO_MATCH_ERROR;
1059 }
1060
1061 /* **this is an optimization step** */
1062 /* Is word already added in this slot? if so, get the token pointer, else, token is NULL
1063 *
1064 * the assumption is that FST_AddWordToGrammar will tell us if this word was newly added in the FST, or
1065 * if the word was added at least 1 iteration ago, meaning that I have already added it to my
1066 * semgraph slot at some earlier point
1067 */
1068 if (newWordAddedToFST)
1069 token = NULL;
1070 else
1071 token = arc_tokens_find_ilabel(impl->arc_token_list, impl->arcs_for_slot[slotID], wdID);
1072
1073 #define FST_GROW_FACTOR 12/10
1074 #define FST_GROWARCS_MIN 100
1075 if (token == NULL) /* new word to add to slot */
1076 {
1077 /* add the script if new */
1078 scriptID = wordmap_find_index(impl->scripts, script);
1079 if (scriptID == MAXwordID)
1080 scriptID = wordmap_add_word(impl->scripts, script);
1081 if (scriptID == MAXwordID)
1082 {
1083 PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1084 return ESR_OUT_OF_MEMORY;
1085 }
1086
1087 token = impl->arcs_for_slot[slotID];
1088 tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1089 if (tmp == NULL)
1090 {
1091 #if defined (FST_GROW_FACTOR)
1092 tmp_arc_token_list_len = impl->arc_token_list_len * FST_GROW_FACTOR;
1093 if(tmp_arc_token_list_len - impl->arc_token_list_len <=FST_GROWARCS_MIN)
1094 tmp_arc_token_list_len+=FST_GROWARCS_MIN;
1095
1096 tmp_arc_token_list= NEW_ARRAY(arc_token,tmp_arc_token_list_len, L("semgraph.wordgraph"));
1097 if(!tmp_arc_token_list) {
1098 PLogError(L("ESR_OUT_OF_MEMORY: Could not extend allocation of semgraph.wordgraph"));
1099 return ESR_OUT_OF_MEMORY;
1100 }
1101 memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1102
1103 for(i=0; i<MAX_NUM_SLOTS;i++)
1104 {
1105 if(impl->arcs_for_slot[i] != NULL) {
1106 offset = impl->arcs_for_slot[i] - impl->arc_token_list;
1107 impl->arcs_for_slot[i] = tmp_arc_token_list + offset;
1108 }
1109 }
1110 token = impl->arcs_for_slot[slotID];
1111
1112 ASSERT( impl->arc_token_freelist == NULL);
1113
1114 impl->arc_token_freelist = tmp_arc_token_list + impl->arc_token_list_len;
1115
1116 FREE(impl->arc_token_list);
1117 impl->arc_token_insert_start = tmp_arc_token_list + (impl->arc_token_insert_start - impl->arc_token_list); //Rabih fix
1118 impl->arc_token_list = tmp_arc_token_list;
1119
1120 for (i = impl->arc_token_list_len; i < tmp_arc_token_list_len - 1; i++)
1121 {
1122 impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1123 impl->arc_token_list[i].next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, (i + 1));
1124 }
1125 impl->arc_token_list[i].first_next_arc = ARC_TOKEN_NULL;
1126 impl->arc_token_list[i].next_token_index = ARC_TOKEN_NULL;
1127
1128 impl->arc_token_list_len = tmp_arc_token_list_len;
1129 tmp = arc_tokens_get_free(impl->arc_token_list, &(impl->arc_token_freelist));
1130 }
1131 #endif
1132 if(tmp == NULL) {
1133 PLogError(L("ESR_OUT_OF_MEMORY: Error adding more arcs to graph\n"));
1134 return ESR_OUT_OF_MEMORY;
1135 }
1136 impl->arcs_for_slot[slotID] = tmp;
1137 tmp->next_token_index = ARC_TOKEN_PTR2LNK(impl->arc_token_list, token);
1138 tmp->ilabel = wdID;
1139 tmp->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1140 }
1141 else
1142 {
1143 old_scriptID = token->olabel - impl->script_olabel_offset;
1144
1145 if (!LSTRCMP(impl->scripts->words[old_scriptID], script))
1146 {
1147 /* nothing to do, we have the word, same meaning again so do nothing */
1148 }
1149 else
1150 {
1151
1152 CHKLOG(rc, make_union_of_scripts(union_script, sizeof(union_script), impl->scripts->words[old_scriptID], script));
1153
1154 #ifdef SREC_ENGINE_VERBOSE_LOGGING
1155 PLogMessage(L("Adding alternate meaning %s for word %s (%s) in slot %s\n"), script, word,
1156 impl->scripts->words[old_scriptID], impl->ilabels->words[slotID]);
1157 #endif
1158 /* add the union as if new (if not already there) */
1159 scriptID = wordmap_find_index(impl->scripts, union_script);
1160 if (scriptID == MAXwordID)
1161 scriptID = wordmap_add_word(impl->scripts, union_script);
1162 if (scriptID == MAXwordID)
1163 {
1164 PLogError(L("ESR_OUT_OF_MEMORY: Could not add script to wordmap"));
1165 return ESR_OUT_OF_MEMORY;
1166 }
1167
1168 /* make the olabel point to the union */
1169 token->olabel = (wordID)(impl->script_olabel_offset + scriptID);
1170 }
1171 }
1172 return ESR_SUCCESS;
1173 CLEANUP:
1174 return rc;
1175 }
1176
1177
1178 /**
1179 * Default implementation.
1180 */
SR_SemanticGraph_Reset(SR_SemanticGraph * self)1181 ESR_ReturnCode SR_SemanticGraph_Reset(SR_SemanticGraph* self)
1182 {
1183 struct SR_SemanticGraphImpl_t *impl = (struct SR_SemanticGraphImpl_t*) self;
1184 wordID slotid;
1185 arc_token* tmp;
1186 arc_token *tmp_arc_token_list;
1187
1188 wordmap_reset(impl->scopes_olabels);
1189 wordmap_reset(impl->scripts);
1190 wordmap_reset(impl->ilabels); //Rabih: I added this
1191 for (slotid = 1; slotid < impl->ilabels->num_slots; slotid++)
1192 {
1193 tmp = impl->arcs_for_slot[slotid];
1194 arc_tokens_free_list(impl->arc_token_list, &(impl->arc_token_freelist), tmp);
1195 impl->arcs_for_slot[slotid] = NULL;
1196 #if defined(SANITY_CHECK)
1197 int count;
1198 for (count = 0, tmp = impl->arc_token_freelist; tmp != NULL;
1199 tmp = ARC_TOKEN_PTR(impl->arc_token_list, tmp->next_token_index))
1200 {
1201 ASSERT(tmp->ilabel != 79324);
1202 tmp->ilabel = 79324;
1203 count++;
1204 }
1205 PLogError("after reset freelist size is %d", count);
1206 #endif
1207 }
1208
1209 // Rabih : Reset the arc_token_list
1210 if(impl->ilabels->num_words == impl->ilabels->num_base_words)
1211 {}
1212 else{
1213 impl->arc_token_list_len = (size_t)(impl->arc_token_insert_start - impl->arc_token_list);
1214 tmp_arc_token_list= NEW_ARRAY(arc_token,impl->arc_token_list_len, L("semgraph.wordgraph"));
1215 memcpy(tmp_arc_token_list,impl->arc_token_list, impl->arc_token_list_len*sizeof(arc_token));
1216
1217 impl->arc_token_freelist = NULL;
1218
1219 FREE(impl->arc_token_list);
1220 impl->arc_token_list = tmp_arc_token_list;
1221 }
1222 return ESR_SUCCESS;
1223 }
1224
serializeArcTokenInfoV2(SR_SemanticGraphImpl * impl,PFile * fp)1225 static ESR_ReturnCode serializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1226 PFile* fp)
1227 {
1228 int i;
1229 asr_uint32_t idx;
1230 arcID tmp[32];
1231
1232 if (pfwrite(&impl->arc_token_list_len, 2, 1, fp) != 1)
1233 return ESR_WRITE_ERROR;
1234
1235 idx = PTR_TO_IDX(impl->arc_token_freelist, impl->arc_token_list);
1236
1237 if (pfwrite(&idx, 4, 1, fp) != 1)
1238 return ESR_WRITE_ERROR;
1239
1240 idx = PTR_TO_IDX(impl->arc_token_insert_start, impl->arc_token_list);
1241
1242 if (pfwrite(&idx, 4, 1, fp) != 1)
1243 return ESR_WRITE_ERROR;
1244
1245 idx = 0;
1246 if (pfwrite(&idx, 4, 1, fp) != 1)
1247 return ESR_WRITE_ERROR;
1248
1249 for (i = 0; i < impl->arc_token_list_len; ++i)
1250 {
1251 arc_token* token = &impl->arc_token_list[i];
1252 tmp[0] = token->ilabel;
1253 tmp[1] = token->olabel;
1254 tmp[2] = ARC_TOKEN_IDX(impl->arc_token_list, token->first_next_arc);
1255 tmp[3] = ARC_TOKEN_IDX(impl->arc_token_list, token->next_token_index);
1256 if (pfwrite(tmp, sizeof(tmp[0]), 4, fp) != 4)
1257 return ESR_WRITE_ERROR;
1258 }
1259
1260 /* new, fixes load/save bug 2007 July 31
1261 todo: change 4 to sizeof(asr_uint32) */
1262 if(1) {
1263 asr_uint32_t idx[MAX_NUM_SLOTS];
1264 for(i=0; i<MAX_NUM_SLOTS; i++)
1265 idx[i] = PTR_TO_IDX(impl->arcs_for_slot[i], impl->arc_token_list);
1266 if (pfwrite(&idx, 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS)
1267 return ESR_WRITE_ERROR;
1268 }
1269
1270 return ESR_SUCCESS;
1271 }
1272
deserializeArcTokenInfoV2(SR_SemanticGraphImpl * impl,PFile * fp)1273 static ESR_ReturnCode deserializeArcTokenInfoV2(SR_SemanticGraphImpl *impl,
1274 PFile* fp)
1275 {
1276 int i;
1277 asr_uint32_t idx;
1278 ESR_ReturnCode rc = ESR_SUCCESS;
1279 arcID tmp[32];
1280
1281 if (pfread(&impl->arc_token_list_len, 2, 1, fp) != 1)
1282 {
1283 rc = ESR_READ_ERROR;
1284 PLogError(L("ESR_READ_ERROR: could not read arc_token_list_len"));
1285 return rc;
1286 }
1287
1288 impl->arc_token_list = NEW_ARRAY(arc_token,
1289 impl->arc_token_list_len,
1290 L("semgraph.wordgraph"));
1291
1292 if (impl->arc_token_list == NULL)
1293 {
1294 rc = ESR_OUT_OF_MEMORY;
1295 PLogError(ESR_rc2str(rc));
1296 return ESR_OUT_OF_MEMORY;
1297 }
1298
1299 if (pfread(&idx, 4, 1, fp) != 1)
1300 {
1301 rc = ESR_READ_ERROR;
1302 PLogError(ESR_rc2str(rc));
1303 goto CLEANUP;
1304 }
1305
1306 impl->arc_token_freelist = IDX_TO_PTR(idx, impl->arc_token_list);
1307
1308 if (pfread(&idx, 4, 1, fp) != 1)
1309 {
1310 rc = ESR_READ_ERROR;
1311 PLogError(ESR_rc2str(rc));
1312 goto CLEANUP;
1313 }
1314
1315 impl->arc_token_insert_start = IDX_TO_PTR(idx, impl->arc_token_list);
1316 // impl->arc_token_insert_start = impl->arc_token_list + impl->arc_token_list_len; // Rabih's fix
1317
1318 if (pfread(&idx, 4, 1, fp) != 1)
1319 {
1320 rc = ESR_READ_ERROR;
1321 PLogError(ESR_rc2str(rc));
1322 goto CLEANUP;
1323 }
1324 impl->arc_token_insert_end = 0;
1325
1326 for (i = 0; i < impl->arc_token_list_len; ++i)
1327 {
1328 arc_token* token = &impl->arc_token_list[i];
1329 if (pfread(tmp, sizeof(tmp[0]), 4, fp) != 4)
1330 {
1331 rc = ESR_READ_ERROR;
1332 goto CLEANUP;
1333 }
1334 token->ilabel = tmp[0];
1335 token->olabel = tmp[1];
1336 if (tmp[2] == MAXarcID)
1337 token->first_next_arc = ARC_TOKEN_NULL;
1338 else
1339 token->first_next_arc = ARC_TOKEN_LNK(impl->arc_token_list, tmp[2]);
1340 if (tmp[3] == MAXarcID)
1341 token->next_token_index = ARC_TOKEN_NULL;
1342 else
1343 token->next_token_index = ARC_TOKEN_LNK(impl->arc_token_list, tmp[3]);
1344 }
1345
1346 /* new, fixes load/save bug 2007 July 31
1347 todo: change 4 to sizeof(asr_uint32) */
1348 if(1) {
1349 asr_uint32_t idx[MAX_NUM_SLOTS];
1350 if (pfread(&idx[0], 4, MAX_NUM_SLOTS, fp) != MAX_NUM_SLOTS) {
1351 rc = ESR_READ_ERROR;
1352 PLogError(ESR_rc2str(rc));
1353 goto CLEANUP;
1354 }
1355 for(i=0; i<MAX_NUM_SLOTS; i++)
1356 impl->arcs_for_slot[i] = IDX_TO_PTR(idx[i], impl->arc_token_list);
1357 }
1358
1359 return ESR_SUCCESS;
1360
1361 CLEANUP:
1362 FREE(impl->arc_token_list);
1363 impl->arc_token_list =
1364 impl->arc_token_freelist =
1365 impl->arc_token_insert_start =
1366 impl->arc_token_insert_end = NULL;
1367 return rc;
1368 }
1369