• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  grxmldoc.cpp  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include <assert.h>
21 #include <stdlib.h>
22 #include <fstream>
23 #include <sstream>
24 #include <iostream>
25 #include <algorithm> // for std::sort
26 #include "tinyxml.h"
27 #include "grph.h"       // The word graph object and interface
28 #include "sub_grph.h"	// The sub-graph object and interface
29 #include "hashmap.h"
30 #include "grxmldoc.h"
31 #include "ESR_Session.h"
32 //#include "LCHAR.h"
33 
34 #define GRXML_DEBUG 0
35 #define MAX_PATH_NAME 512
36 
37 #define FATAL_ERROR(x,y) { std::cout << (x) << std::endl; exit ((y)); }
38 #define WARNING(x) std::cout << (x) << std::endl;
39 
40 #if GRXML_DEBUG
41 //#define DEBUG_PRINT(x) //
42 #define DEBUG_PRINT(x) std::cout << (x) << std::endl;
43 #define PRINT_EXPRESSION(x)
44 //#define PRINT_EXPRESSION(x) std::cout << (x) << std::endl;
45 #else
46 #define DEBUG_PRINT(x) //
47 #define PRINT_EXPRESSION(x) //
48 
49 #endif
50 
51 using namespace std;
52 
53 #define CHECK_NOT_EMPTY(s, t) { if (s.empty()) \
54 				{ \
55 				std::cout << "ERROR: Empty string of type "  << t <<std::endl; \
56 				} \
57 			     }
58 
get_range(const std::string & s,int * minCnt,int * maxCnt)59 int get_range(const std::string& s, int* minCnt, int* maxCnt)
60 {
61   std::string sval;
62   size_t p1 =s.find("-");
63   if ( p1 !=string::npos ) {
64     sval.assign( s, 0, p1 );
65     if(strspn(sval.c_str(),"0123456789")<1) return 1;
66     *minCnt = atoi( sval.c_str() );
67     sval.assign( s, p1+1, s.size() );
68     *maxCnt = -1;    // 0== any?
69     // If max is given then use BeginCount otherwise use BeginItemRepeat
70     if (!sval.empty() ) {
71       if(strspn(sval.c_str(),"0123456789")<1) return 1;
72       *maxCnt = atoi( sval.c_str() );
73     }
74     return 0;
75   }
76   p1 = s.find("+");
77   if( p1 != string::npos) {
78     sval.assign( s, 0, p1 );
79     if(strspn(sval.c_str(),"0123456789")<1) return 1;
80     *minCnt = atoi( sval.c_str() );
81     *maxCnt = -1;
82     return 0;
83   }
84   if(strspn(s.c_str(),"0123456789")<1) return 1;
85   *minCnt = *maxCnt = atoi( s.c_str());
86   return 0;
87 }
88 
GRXMLDoc()89 GRXMLDoc::GRXMLDoc()
90 {
91     m_NodeKeyWords.insert(make_pair("grammar", NodeTypeGrammar));
92     m_NodeKeyWords.insert(make_pair("rule", NodeTypeRule));
93     m_NodeKeyWords.insert(make_pair("ruleref", NodeTypeRuleReference));
94     m_NodeKeyWords.insert(make_pair("one-of", NodeTypeOneOf));
95     m_NodeKeyWords.insert(make_pair("item", NodeTypeItem));
96     m_NodeKeyWords.insert(make_pair("tag", NodeTypeTag));
97     m_NodeKeyWords.insert(make_pair("count", NodeTypeCount));
98     m_NodeKeyWords.insert(make_pair("meta", NodeTypeMeta));
99     m_pGraph = 0;
100     m_RuleAutoIndex = 0;
101     m_TagAutoIndex = 0;
102     m_LabelAutoIndex = 0;
103     m_ExpandedRulesAutoIndex = 0;
104     m_XMLFileName = "dummy.xml";
105 }
106 
107 
~GRXMLDoc()108 GRXMLDoc::~GRXMLDoc()
109 {
110     deleteRules();
111     if (m_pGraph) {
112         delete m_pGraph;
113     }
114 }
115 
116 
parseGrammar(XMLNode & node,std::string & xMLFileName)117 bool GRXMLDoc::parseGrammar( XMLNode &node, std::string & xMLFileName )
118 {
119     m_XMLFileName = xMLFileName;
120     // Set up the internally defined rules, etc.
121     initializeLists();
122     // The top level "document" node is given to this fn
123     // Create the container for the word graph.
124     if (m_pGraph) {
125         delete m_pGraph;
126     }
127     m_pGraph = new Graph("XML grammar");
128     SubGraph *p_SubGraph;
129 
130     parseNode( node, p_SubGraph, 1 );     // NB Subgraph pointed to will change in recursive fn.
131 
132     if (findSubGraph( m_RootRule, p_SubGraph )) {
133 	m_pGraph->ExpandRules (p_SubGraph);
134 	p_SubGraph->RemoveInternalConnections ();
135 	//Print the root rule.
136 	//printSubgraph( *p_SubGraph );
137     }
138     return true;
139 }
140 
141 
parseNode(XMLNode & node,SubGraph * & p_SubGraph,const unsigned int level)142 bool GRXMLDoc::parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
143 {
144     // We will create a new subgraph for each rule node.
145     // The "current" subgraph is substituted with the new subgraph for all ops on child nodes.
146     // After processing child nodes the original subgraph is reinstated
147     // for final operations in the endNode() fn.
148 
149     // Initial processing of the current node before processing children
150 #if 0 && GRXML_DEBUG
151 	if(node.Type() == TiXmlNode::ELEMENT)
152 		node.ToElement()->Print( stdout, level);
153 	else if(node.Type() == TiXmlNode::DOCUMENT)
154 		node.ToDocument()->Print( stdout, level);
155 	else if(node.Type() == TiXmlNode::TEXT)
156 		node.ToText()->Print( stdout, level);
157 	else if(node.Type() == TiXmlNode::DECLARATION)
158 		node.ToDeclaration()->Print( stdout, level);
159 	else {
160 		const char* text = node.Value();
161 		if(!text) text = "__NULL__";
162 		printf("processing node type %d text %s\n", node.Type(), text);
163 	}
164 #endif
165     beginNode( node, p_SubGraph, level );
166 
167     SubGraph *p_LocalSubGraph;
168     p_LocalSubGraph = p_SubGraph;
169 	TiXmlNode* child;
170 	for( child = node.FirstChild(); child; child = child->NextSibling() )
171     {
172 		parseNode ( *child, p_SubGraph, level+1 );
173     }
174     // Revert current node
175     p_SubGraph = p_LocalSubGraph;
176 
177     // Finish processing current node
178     endNode( node, p_SubGraph, level );
179 
180     return true;
181 } // parseNode
182 
183 
beginNode(XMLNode & node,SubGraph * & p_SubGraph,const unsigned int level)184 bool GRXMLDoc::beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
185 {
186     std::string name = node.Value();
187     DEBUG_PRINT("Element = " + name);
188 
189     // XMLNode::Type type = node.getType();
190     if ( node.Type() == TiXmlNode::TEXT) // isCData()
191     {
192       const char* cc_name = node.Parent()->Value();
193       std::string str_name(cc_name);
194       DEBUG_PRINT (std::string("CDATA ") + name);
195       DEBUG_PRINT (std::string("CDATA ") + str_name);
196 
197       processCDATA( node, p_SubGraph );
198     }
199     else if ( node.Type()== TiXmlNode::ELEMENT /*isNode()*/ || node.NoChildren() /*isLeaf()*/)
200       {
201 	//printNode(node, level);
202 	// Use enum value
203 	KEYWDPAIR::iterator pos;
204 	pos = m_NodeKeyWords.find( name );
205 	KeywordValues nodeType = NodeTypeBadValue;
206 	if ( pos != m_NodeKeyWords.end() )
207 	{
208 	    nodeType = (*pos).second;
209 	    DEBUG_PRINT("nodeType=" + nodeType);
210 	} else if(node.Type() == TiXmlNode::COMMENT) {
211 		return true;
212 	} else if(node.Type() == TiXmlNode::DECLARATION && name.length()==0) {
213 		return true;
214 	} else {
215 	  FATAL_ERROR( std::string("Error: unknown tag ") + name, ESR_INVALID_ARGUMENT);
216 	}
217 
218 	switch ( nodeType )
219 	{
220 	case NodeTypeGrammar:
221 	    {
222 		beginParseGrammarNode( node );
223 	    }
224 	    break;
225 	case NodeTypeRule:
226 	    {
227 		// NB This fn creates a new subgraph.
228 		beginParseRuleNode( node, p_SubGraph );
229 	    }
230 	    break;
231 	    case NodeTypeRuleReference:
232 	    {
233 		// NB This fn creates a new subgraph.
234 		beginRuleRef( node, p_SubGraph );
235 	    }
236 	    break;
237 	    case NodeTypeOneOf:
238 	    {
239 		beginOneOf( node, p_SubGraph );
240 	    }
241 	    break;
242 	    case NodeTypeItem:
243 	    {
244 		beginItem( node, p_SubGraph );
245 	    }
246 	    break;
247 	    case NodeTypeTag:
248 	    {
249 		beginTag( node, p_SubGraph );
250 	    }
251 	    break;
252 	    case NodeTypeCount:
253 	    {
254 		beginCount( node, p_SubGraph );
255 	    }
256 	    break;
257 	    case NodeTypeMeta:
258 	    {
259 	        beginParseMetaNode( node );
260 	    }
261 	    break;
262 	    case NodeTypeBadValue:
263 	    default:
264 		DEBUG_PRINT( "UNKNOWN node name: " + name );
265 	    break;
266 	}; // switch
267     } //is a Node or Leaf
268     else if ( node.Type() == TiXmlNode::TEXT) // isCData()
269       {
270 	DEBUG_PRINT (std::string("CDATA ") + name);
271 	processCDATA( node, p_SubGraph );
272     }
273     return true;
274 } // beginNode()
275 
276 
endNode(XMLNode & node,SubGraph * & p_SubGraph,const unsigned int level)277 bool GRXMLDoc::endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level )
278 {
279     std::string name = node.Value();
280     //XMLNode::Type type = node.getType();
281 
282     if ( node.Type()== TiXmlNode::ELEMENT /*isNode()*/ || node.NoChildren() )
283     {
284 	KEYWDPAIR::iterator pos;
285 	pos = m_NodeKeyWords.find( name );
286 	KeywordValues nodeType = NodeTypeBadValue;
287 	if ( pos != m_NodeKeyWords.end() )
288 	{
289 	    nodeType = (*pos).second;
290 	}  else if(node.Type() == TiXmlNode::COMMENT) {
291 		return true;
292 	} else if(node.Type() == TiXmlNode::DECLARATION && name.length()==0) {
293 		return true;
294 	} else if(node.Type() == TiXmlNode::TEXT) {
295 
296 	} else {
297 	  FATAL_ERROR( std::string("Error: unknown tag ") + name, ESR_INVALID_ARGUMENT );
298 	}
299 
300 	switch ( nodeType )
301 	{
302 	case NodeTypeGrammar:
303 	{
304 	    endParseGrammarNode( node );
305 	}
306 	break;
307 	case NodeTypeRule:
308 	{
309 	    endParseRuleNode( node, p_SubGraph );
310 	}
311 	break;
312 	case NodeTypeRuleReference:
313 	{
314 	    endRuleRef( node, p_SubGraph );
315 	}
316 	break;
317 	case NodeTypeOneOf:
318 	{
319 	    endOneOf( node, p_SubGraph );
320 	}
321 	break;
322 	case NodeTypeItem:
323 	{
324 	    endItem(node, p_SubGraph );
325 	}
326 	break;
327 	case NodeTypeTag:
328 	{
329 	    endTag( node, p_SubGraph );
330 	}
331 	break;
332 	case NodeTypeCount:
333 	{
334 	    endCount( node, p_SubGraph );
335 	}
336 	break;
337         case NodeTypeMeta:
338 	{
339             endParseMetaNode( node );
340 	}
341 	break;
342 	case NodeTypeBadValue:
343 	default:
344 	    DEBUG_PRINT( "UNKNOWN node name: ");
345 	    DEBUG_PRINT( name.c_str() );
346 	//Extend the
347 	break;
348 	}; // switch
349     } //isNode() or isLeaf()
350     else
351     {
352 	// Do nothing?
353     }
354     return true;
355 } // endNode()
356 
357 
beginParseGrammarNode(XMLNode & node)358 bool GRXMLDoc::beginParseGrammarNode(XMLNode &node)
359 {
360 	const char* attr;
361 #define GETATTR(nAmE) ((attr=node.ToElement()->Attribute(nAmE))!=NULL) ? attr:""
362 	m_XMLMode      = GETATTR("mode");
363 	m_XMLLanguage  = GETATTR("xml:lang");
364     m_RootRule     = GETATTR("root");	// The root rule name
365 
366     DEBUG_PRINT("Root rule = " + m_RootRule);
367 
368     m_XMLTagFormat = GETATTR("tag-format");
369     m_XMLVersion   = GETATTR("version");
370     m_XMLBase      = GETATTR("xml:base");
371     return true;
372 }
373 
beginParseMetaNode(XMLNode & node)374 bool GRXMLDoc::beginParseMetaNode(XMLNode &node)
375 {
376   const char* attr;
377   std::string meta_name  = GETATTR("name");
378   std::string meta_value = GETATTR("content");
379 
380   if(meta_name == "word_penalty") {
381     m_MetaKeyValPairs.insert(meta_name,meta_value);
382     // m_MetaKeyValPairs.print();
383   } else if(meta_name == "do_skip_interword_silence") {
384     for(int j = 0; j<(int)meta_value.size(); j++){
385       meta_value[j] = tolower(meta_value[j]); //lower();
386     }
387     if(meta_value!="true" && meta_value!="false")
388       printf ("\nWarning: %s must be set to 'true' or 'false'; defaulting to 'false'\n", meta_name.c_str());
389     else
390       m_MetaKeyValPairs.insert(meta_name,meta_value);
391   } else if(meta_name == "userdict_name") {
392     printf ("\nWarning: ignoring unsupported meta %s %s\n", meta_name.c_str(), meta_value.c_str());
393   } else {
394     printf ("\nWarning: ignoring unsupported meta %s %s\n", meta_name.c_str(), meta_value.c_str());
395   }
396   return true;
397 }
398 
399 
endParseGrammarNode(XMLNode & node)400 bool GRXMLDoc::endParseGrammarNode(XMLNode &node)
401 {
402     // End parse operations
403     return true;
404 }
405 
406 
beginParseRuleNode(XMLNode & node,SubGraph * & p_SubGraph)407 bool GRXMLDoc::beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph)
408 {
409 	const char* attr;
410     // Note: The subGraph may change if there are forward references. This
411     // is fine as we revert to the previous one when finished parsing the current node.
412     DEBUG_PRINT ( "---- Rule\n" );
413     std::string ruleName = GETATTR("id" );
414     std::string s_tag    = GETATTR("tag" );
415     if( s_tag.length()>0) {
416       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
417     }
418     CHECK_NOT_EMPTY( ruleName, "id" );
419     // Rule name must be unique within scope of entire grammar.
420     // Put rule on stack - for context
421     m_RuleListStack.push( ruleName );
422 
423     // Check whether a ruleref placeholder exists for this rule.
424     int index;
425     bool foundRule = findRuleIndex( ruleName, index );
426     if (foundRule) {
427 	// Rule is already declared; it must have been forward referenced
428 	// so swap the placeholder subgraph in.
429 	// NB subgraph and rule name are already known to lists.
430 	SubGraph *p_ExistingSubgraph;
431 	if ( findSubGraph( ruleName, p_ExistingSubgraph ) ) {
432 	    p_SubGraph = p_ExistingSubgraph;
433 	}
434 	else {
435 	    FATAL_ERROR("ERROR! Subgraph without rule name entry found!", -1);
436         }
437     }
438     else {
439 	// Create a Word Graph node for each rule node
440 	SubGraph *newGraph;
441 	addRuleToList( ruleName, newGraph );
442 	p_SubGraph = newGraph;
443     }
444 
445     // Make a note of the scope or rules; public, etc - used in map file.
446     findRuleIndex( ruleName, index );
447     std::string ruleScope = GETATTR("scope" );
448     if ( !ruleScope.empty() ) {
449         m_RuleScope.insert(index, ruleScope);
450     }
451 
452     // We must accommodate Rules that have CDATA without an <item> element.
453     // We need to infer this element for all rules.
454     m_pGraph->BeginItem( p_SubGraph );
455 
456     PRINT_EXPRESSION( ruleName + " = { " );
457     return true;
458 } // beginParseRuleNode()
459 
460 
endParseRuleNode(XMLNode & node,SubGraph * & p_SubGraph)461 bool GRXMLDoc::endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph )
462 {
463     // The rule expression has been built as a subgraph and ID added to the rule list.
464     // Finished editing subgraph
465     DEBUG_PRINT ( "---- /Rule\n" );
466     //m_pGraph->EndRule(&p_SubGraph);
467     // Tell the world
468     //std::string ruleName = attr.get( "id" );
469     std::string ruleName = m_RuleListStack.top();
470     m_RuleListStack.pop();
471     //CHECK_NOT_EMPTY( ruleName, "id" );
472     // Must be unique rule name within scope of entire grammar.
473     // Check whether a ruleref placeholder exists for this rule.
474     m_pGraph->addSubGraph ( p_SubGraph );
475 
476     // We must accommodate Rules that have CDATA without an <item> element.
477     // We need to infer this element for all rules.
478     m_pGraph->EndItem( p_SubGraph );
479 
480     PRINT_EXPRESSION( " }\n" );
481     return true;
482 }
483 
processCDATA(XMLNode & node,SubGraph * & p_SubGraph)484 bool GRXMLDoc::processCDATA( XMLNode &node, SubGraph *&p_SubGraph )
485 {
486     // Note the Item's CDATA
487     // Strip leading and trailing whitespace
488     const char* cc_name = node.Parent()->Value();
489     std::string str_name(cc_name); // = node.Parent()->ValueStr(); // getName
490     // std::string name = node.Parent()->Value(); // getName
491     //if ( name == "item" ) {
492     if ( str_name != "tag" ) {
493 
494 	const char* const whitespace = " \t\r\n\v\f";
495 	std::string cdata = node.Value(); // getCData()
496 	std::string word; // Words are whitespace separated
497 
498 	cdata.erase(0, cdata.find_first_not_of(whitespace) );
499 	cdata.erase(cdata.find_last_not_of(whitespace) + 1);
500 #if GRXML_DEBUG
501         std::cout << "/--" << cdata << "--/\n";
502 #endif
503 
504 	std::string::size_type begIdx, endIdx;
505 
506         //search beginning of the first word
507         begIdx = cdata.find_first_not_of(whitespace);
508 
509         //while beginning of a word found
510 	while (begIdx != std::string::npos) {
511             //search end of the actual word
512             endIdx = cdata.find_first_of (whitespace, begIdx);
513             if (endIdx == string::npos) {
514                 //end of word is end of line
515                 endIdx = cdata.length();
516             }
517             word.clear();
518 	    // word.assign(cdata,begIdx,endIdx);
519 	    word.append (cdata, begIdx, endIdx - begIdx);
520 	    if ( !word.empty() )
521 	    {
522 #if GRXML_DEBUG
523 		std::cout << " -->" << word << "<--\n";
524 #endif
525 		int index;
526 		// If a slot then take note of rule name
527 		if ( IsSlot( word ) ) {
528 		  const char* xmlBasename;
529 		  std::string ruleName = m_RuleListStack.top();
530 		  m_SlotList.insert(index, ruleName);
531 		  xmlBasename = strrchr(m_XMLFileName.c_str(),'/');
532 		  xmlBasename = xmlBasename ? xmlBasename+1 : m_XMLFileName.c_str();
533 		  word = (std::string)xmlBasename + "." + ruleName + "@" + word;
534 		  addLabelToList( word );
535 		  findLabelIndex( word, index );
536 		} else {
537 		  addLabelToList( word );
538 		  findLabelIndex( word, index );
539 		}
540 		m_pGraph->AddLabel( p_SubGraph, index );
541 	    }
542 	    begIdx = cdata.find_first_not_of (whitespace, endIdx);
543 
544 	}
545     } //tag
546     else {
547 	// Do nothing with CDATA for elements that are not items.
548 	// In particular, do not strip whitespace from tag cdata.
549 	// However, CPPDOM appears to remove linefeeds. May need to tidy up.
550 
551     }
552     return true;
553 } // cdata
554 
beginItem(XMLNode & node,SubGraph * & p_SubGraph)555 bool GRXMLDoc::beginItem( XMLNode &node, SubGraph *&p_SubGraph )
556 {
557 	const char* attr;
558     DEBUG_PRINT ("---- Item:\n");
559     // First check whethere there is a count/repeat
560     std::string s     = GETATTR("repeat" );
561     int minCnt=0,maxCnt=0;
562     std::string s_tag = GETATTR("tag" );
563     if( s_tag.length()>0) {
564       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
565     }
566     if( s.length()>0 && get_range( s, &minCnt, &maxCnt) ) {
567       FATAL_ERROR(std::string("error: while parsing range ") + s,1);
568     }
569     if ( !s.empty() ) {
570       // RED FLAG: max should not be 0! A +ve number should have been given.
571       if( maxCnt>0) {
572 	m_pGraph->BeginCount( p_SubGraph, minCnt, maxCnt );
573       }
574       else {
575 	// NB: BeginItemRepeat  can only use min of 0 or 1!
576 	m_pGraph->BeginItemRepeat ( p_SubGraph, minCnt, -1);
577       }
578     }
579     else {
580 	m_pGraph->BeginItem( p_SubGraph );
581     }
582     return true;
583 }
584 
585 
endItem(XMLNode & node,SubGraph * & p_SubGraph)586 bool GRXMLDoc::endItem( XMLNode &node, SubGraph *&p_SubGraph )
587 {
588     DEBUG_PRINT ( "---- /Item\n" );
589 
590     // What TODO if no tag for an item?
591 
592     m_pGraph->EndItem( p_SubGraph );
593     return true;
594 }
595 
596 
beginRuleRef(XMLNode & node,SubGraph * & p_SubGraph)597 bool GRXMLDoc::beginRuleRef( XMLNode &node, SubGraph *&p_SubGraph )
598 {
599     // Extend word FST node with an entire FST subgraph.
600     // Forward referencing of rules is supported.
601     // NB Remove the leading # from the ruleref name!
602     DEBUG_PRINT ( "---- Ruleref\n" );
603 
604 	const char* attr;
605     std::string s_tag = GETATTR("tag" );
606     if( s_tag.length()>0) {
607       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
608     }
609     std::string s = GETATTR("uri" );
610     if (s.empty())
611     {
612 	//
613 	FATAL_ERROR( "ERROR! Ruleref specifies no uri name!", -1 );
614     }
615     // Remove the #:
616     int p1 = s.find("#");
617     if ( p1 !=0 ) {
618 	FATAL_ERROR( "ERROR! bad ruleref name: '" + s + "'" + ". Rule reference must start with a '#'. External references are not supported.", -1 );
619     }
620     string ruleName;
621     getRuleRefName( node, ruleName );
622 
623     //std::string parentRuleName = m_RuleListStack.top();
624     //addRuleDependency( parentRuleName, ruleName );
625 
626     int index;
627     bool foundRule = findRuleIndex( ruleName, index );
628     if (!foundRule) {
629 	// Forward reference; create a placeholder subgraph ptr.
630 	//SubGraph *newGraph = new SubGraph( (char *) ruleName.c_str() );
631 	// RED FLAG:  Remember to check fwd ref rule was filled in at end.
632 	SubGraph *newGraph;
633 	addRuleToList( ruleName, newGraph );
634 	findRuleIndex( ruleName, index );
635     }
636     // We can now treat a forward-referenced graph as if it was defined.
637     // We will add the subgraph when we have the tag - see endItem().
638     m_pGraph->BeginRule( p_SubGraph );
639     m_pGraph->AddRuleRef( p_SubGraph, index );
640     m_pGraph->EndRule( p_SubGraph );
641 
642     return true;
643 }
644 
645 
endRuleRef(XMLNode & grmNode,SubGraph * & p_SubGraph)646 bool GRXMLDoc::endRuleRef(XMLNode &grmNode, SubGraph *&p_SubGraph )
647 {
648     DEBUG_PRINT ( "---- /Ruleref\n" );
649     // Does nothing
650     // NB The tag is not under the ruleref element - it is in the current item element.
651     // We now add the tag of the AddRuleRef as we see the tag element. See EndTag().
652 
653     return true;
654 }
655 
656 
beginOneOf(XMLNode & grmNode,SubGraph * & p_SubGraph)657 bool GRXMLDoc::beginOneOf(XMLNode &grmNode, SubGraph *&p_SubGraph)
658 {
659     DEBUG_PRINT ( "----OneOf\n" );
660     m_pGraph->BeginOneOf (p_SubGraph);
661     return true;
662 }
663 
664 
endOneOf(XMLNode & grmNode,SubGraph * & p_SubGraph)665 bool GRXMLDoc::endOneOf(XMLNode &grmNode, SubGraph *&p_SubGraph)
666 {
667     DEBUG_PRINT ( "----/OneOf\n" );
668     m_pGraph->EndOneOf (p_SubGraph);
669     return true;
670 }
671 
672 
beginTag(XMLNode & node,SubGraph * & p_SubGraph)673 bool GRXMLDoc::beginTag( XMLNode &node, SubGraph *&p_SubGraph )
674 {
675     DEBUG_PRINT ("---- Tag\n");
676     std::string s = node.ToElement()->GetText(); // getCdata();
677 #if GRXML_DEBUG
678     std::cout << s;     // debug
679 #endif
680     // Store the semantic tag info.
681     // NB Do not strip whitespace from tag cdata
682     if ( !s.empty() )
683     {
684 	int index;
685 	addTagToList( s );
686 	findTagIndex( s, index );
687 	m_pGraph->AddTag ( p_SubGraph, index );
688     }
689 
690     return true;
691 }
692 
693 
endTag(XMLNode & node,SubGraph * & p_SubGraph)694 bool GRXMLDoc::endTag( XMLNode &node, SubGraph *&p_SubGraph )
695 {
696     DEBUG_PRINT ("---- /Tag\n");
697     return true;
698 }
699 
700 
beginCount(XMLNode & node,SubGraph * & p_SubGraph)701 bool GRXMLDoc::beginCount( XMLNode &node, SubGraph *&p_SubGraph )
702 {
703 	const char* attr;
704     // Count of reps applies to the text elements in this count node
705     DEBUG_PRINT ("---- Count\n");
706     // Get number attr
707     std::string s     = GETATTR("number");
708     std::string s_tag = GETATTR("tag" );
709     if( s_tag.length()>0) {
710       FATAL_ERROR("Error: unsupported tag= syntax, use <tag> ... </tag>", 1)
711     }
712     if (s.empty()) {
713 		return false;
714     }
715     // not  in subgraph but in graph?!
716     //graph.BeginCount(n);
717 
718     int minCnt=-1, maxCnt=-1;
719     if( get_range( s, &minCnt, &maxCnt) ) {
720       FATAL_ERROR(std::string("error: while parsing range ") + s,1);
721     }
722     if ( s.c_str() == std::string("optional") )
723     {
724 	m_pGraph->BeginOptional( p_SubGraph );
725     }
726     else if ( minCnt>0 && maxCnt>0)
727     {
728 	m_pGraph->BeginCount( p_SubGraph, minCnt, maxCnt );
729     }
730     else if( minCnt>0 )
731       {
732 	m_pGraph->BeginItemRepeat ( p_SubGraph, minCnt, -1);
733       }
734     else { //
735     	m_pGraph->BeginOptional ( p_SubGraph );
736     }
737 
738     return true;
739 }
740 
741 
endCount(XMLNode & node,SubGraph * & p_SubGraph)742 bool GRXMLDoc::endCount( XMLNode &node, SubGraph *&p_SubGraph )
743 {
744     DEBUG_PRINT ("---- /Count\n");
745     m_pGraph->EndCount( p_SubGraph );
746     return true;
747 }
748 
endParseMetaNode(XMLNode & node)749 bool GRXMLDoc::endParseMetaNode(XMLNode &node)
750 {
751   // End parse operations
752   return true;
753 }
754 
printNode(XMLNode & node,int level)755 void GRXMLDoc::printNode(XMLNode &node, int level)
756 {
757     std::string name = node.Value();
758     int type = node.Type();
759     std::string c_data;
760 
761     for(int i=0;i<level;i++) std::cout << " ";
762 
763     char c = ' ';
764     switch(type)
765     {
766     case TiXmlNode::ELEMENT:
767 	// case XMLNode::xml_nt_node: // grammar, rule, one-of, item, count
768 	 c = '+';
769 	 break;
770 	/* case TiXmlNode::TEXT:
771 	// case XMLNode::xml_nt_leaf:
772 	c = '-';
773 	break; */
774     case TiXmlNode::DOCUMENT:
775     // case XMLNode::xml_nt_document:
776 	c = '\\';
777 	break;
778     case TiXmlNode::TEXT:
779     // case XMLNode::xml_nt_cdata:
780 	c = '#';
781 	c_data = node.Value(); // getCdata();
782 	break;
783 	case TiXmlNode::UNKNOWN:
784 	case TiXmlNode::COMMENT:
785 	case TiXmlNode::TYPECOUNT:
786 	case TiXmlNode::DECLARATION:
787 	default:
788 		std::cout << "Error: not sure what to do here" << std::endl;
789 		break;
790     }
791 	if(node.Type() == TiXmlNode::TEXT)  // isCData()
792 	  std::cout << c << name.c_str() << "[" << c_data << "]" << std::endl;
793 	//Extend the tag hashtable
794     else
795 	  std::cout << c << name.c_str() << std::endl;
796 
797 	if( node.Type() == TiXmlNode::ELEMENT) {
798 
799 		for(TiXmlAttribute* attr=node.ToElement()->FirstAttribute();
800 			attr; attr=attr->Next() ) {
801 
802 		  // guru: added output of attributes
803 			for (int i=0; i<level; i++)
804 				std::cout << " ";
805 			std::cout << "   ";
806 			std::cout << attr->Name() << ": " << attr->Value() << std::endl;
807 		}
808 	}
809 
810 }
811 
812 /** Function: addRuleToList
813     Extends list of SubGraphs with given subGraph
814     and extends list of rule names too.
815     TODO: Can we use one hash and use internal numeric index for rule IDs?
816 */
817 
818 
addRuleToList(std::string const & ruleName,SubGraph * & p_SubGraph)819 bool GRXMLDoc::addRuleToList(std::string const & ruleName, SubGraph *&p_SubGraph)
820 {
821     int index;
822     if ( findRuleIndex ( ruleName, index ) ) {
823 	FATAL_ERROR("ERROR! Rule name " + ruleName + " is already defined!", -1 );
824     }
825 
826     addLabelToList( m_XMLFileName + "@" + ruleName);
827     findLabelIndex( m_XMLFileName + "@" + ruleName, index );
828 #if GRXML_DEBUG
829     std::cout << "Rule " << ruleName << std::endl;
830 #endif
831     // Create the new subgraph and update lists
832     m_RuleList.insert( ruleName, index );
833     p_SubGraph = new SubGraph( (char *) ruleName.c_str(), index );
834 
835     bool success = m_SubgraphList.insert( ruleName, p_SubGraph );
836     if (!success) {
837 	FATAL_ERROR("ERROR! subgraph for " + ruleName + " is already defined!", -1 );
838     }
839 #if ADD_BRACES
840     addLabelToList( "{" );
841     std::stringstream  ss;
842     ss << "}(" << index << ")";
843     addLabelToList( ss.str());
844 #endif
845     return success;
846 }
847 
848 
deleteRules()849 bool GRXMLDoc::deleteRules()
850 {
851     // Delete all allocated subgraphs.
852     // The rule strings are part of the hashtables and get deleted by them.
853     int index;
854     SubGraph *p_SubGraph;
855     std::string ruleName;
856     while ( !m_RuleList.isEmpty() ) {
857 	m_RuleList.getFirst( &ruleName, &index );
858 	m_RuleList.remove( ruleName );
859 	if (m_SubgraphList.getValue( ruleName, &p_SubGraph ) ) {
860 	    delete p_SubGraph;
861 	}
862 	else {
863 	    FATAL_ERROR("No subgraph for rule " + ruleName + "! Mismatched rules and subgraph hashtables!", -1);
864 	}
865     }
866     m_SubgraphList.clear();
867     m_RuleList.clear();
868     m_LabelList.clear();
869     m_TagList.clear();
870     return true;
871 }
872 
findSubGraph(std::string & s,SubGraph * & p_SubGraph)873 bool GRXMLDoc::findSubGraph(std::string & s, SubGraph *&p_SubGraph)
874 {
875     return m_SubgraphList.getValue(s, &p_SubGraph);
876 }
877 
findRule(int i,std::string & s)878 bool GRXMLDoc::findRule(int i, std::string &s )
879 {
880     return m_RuleList.getIndex( i, &s );
881 }
882 
findTag(int i,std::string & s)883 bool GRXMLDoc::findTag(int i, std::string &s )
884 {
885     return m_TagList.getValue( i, &s );
886 }
887 
findLabel(int i,std::string & s)888 bool GRXMLDoc::findLabel(int i, std::string &s )
889 {
890     return m_LabelList.getValue( i, &s );
891 }
892 
findSubGraphIndex(SubGraph * p_SubGraph,std::string & s)893 bool GRXMLDoc::findSubGraphIndex( SubGraph *p_SubGraph, std::string &s )
894 {
895     return m_SubgraphList.getIndex( p_SubGraph, &s );
896 }
897 
findRuleIndex(std::string s,int & i)898 bool GRXMLDoc::findRuleIndex( std::string s, int &i )
899 {
900     return m_RuleList.getValue( s, &i );
901 }
findTagIndex(std::string s,int & i)902 bool GRXMLDoc::findTagIndex( std::string s, int &i )
903 {
904     return m_TagList.getIndex( s, &i );
905 }
findLabelIndex(std::string s,int & i)906 bool GRXMLDoc::findLabelIndex( std::string s, int &i )
907 {
908     return m_LabelList.getIndex( s, &i );
909 }
findMeta(const std::string & sn,std::string & s)910 bool GRXMLDoc::findMeta(const std::string & sn, std::string &s)
911 {
912     return m_MetaKeyValPairs.getValue( sn, &s );
913 }
setMeta(const std::string & sn,const std::string & s)914 bool GRXMLDoc::setMeta(const std::string & sn, const std::string &s)
915 {
916   std::string tmp;
917   if(findMeta(sn,tmp))
918     m_MetaKeyValPairs.remove(sn);
919   return m_MetaKeyValPairs.insert(sn,s);
920 }
921 
addTagToList(std::string const & s)922 bool GRXMLDoc::addTagToList( std::string const& s )
923 {
924     bool success = true;
925     // Make values unique
926     int index;
927     if ( !findTagIndex( s, index ) )
928 	success = m_TagList.insert( m_TagAutoIndex++, s );
929     return success;
930 }
931 
932 
addLabelToList(std::string const & s)933 bool GRXMLDoc::addLabelToList( std::string const& s )
934 {
935   // TODO: Labels should be unique. Change key.
936   int index;
937   bool bRes = m_LabelList.getIndex( s, &index );
938   if(bRes == true) {
939     return false; // exists
940   }
941   bRes = m_LabelList.insert( m_LabelAutoIndex++, s );
942   return  bRes;
943 }
944 
printLists()945 void GRXMLDoc::printLists()
946 {
947     m_SubgraphList.print();
948     m_RuleList.print();
949     m_TagList.print();
950     m_LabelList.print();
951 }
952 
953 
printSubgraphs()954 void GRXMLDoc::printSubgraphs()
955 {
956     SubGraph *p_SubGraph;
957     std::string rule;
958     int index;
959     if ( m_RuleList.getFirst( &rule, &index) ) {
960 	if ( findSubGraph( rule, p_SubGraph ) ) {
961 	    DEBUG_PRINT("============ Rule: " + rule + "============");
962 	    printSubgraph( *p_SubGraph );
963 	    while ( m_RuleList.getNext( &rule, &index) ) {
964 		if ( findSubGraph( rule, p_SubGraph ) ) {
965 		    printSubgraph( *p_SubGraph );
966 		}
967 	    }
968 	}
969     }
970 }
971 
972 
printSubgraph(SubGraph & p_SubGraph)973 void GRXMLDoc::printSubgraph( SubGraph &p_SubGraph )
974 {
975     p_SubGraph.PrintWithLabels( *this );
976 }
977 
978 
getRuleRefName(XMLNode & node,std::string & ruleName)979 bool GRXMLDoc::getRuleRefName(XMLNode &node, std::string &ruleName)
980 {
981   const char* attr;
982   std::string s = GETATTR("uri" );
983   if (s.empty()) {
984     FATAL_ERROR( "ERROR! Ruleref specifies no uri name!", -1 );
985   }
986   // Remove the #:
987   int p1 = s.find("#");
988   if ( p1 !=0 ) {
989     FATAL_ERROR( "ERROR! bad ruleref name: '" + s + "'", -1 );
990   }
991   ruleName.assign( s, 1, s.size() );
992   return true;
993 }
994 
initializeLists()995 void GRXMLDoc::initializeLists()
996 {
997   m_SubgraphList.setName("Subgraphs");
998   m_RuleList.setName("Rules");
999   m_TagList.setName("Tags");
1000   m_LabelList.setName("Labels");
1001 
1002   /* Predefined rules. NB Labels are also created for each rule added.
1003   // The required order for these labels in the .map output file is:
1004   //     0   eps
1005   //     next come slots
1006   //     pau and pau2
1007   //     everything else
1008   // We will add all these now in case they are referenced and we will
1009   // reindex after we have parsed the grammar -- when we have the list
1010   // of slots. This re-indexing is for the output files .map and .P.txt.
1011   //
1012   */
1013     addLabelToList( "eps" );
1014 
1015     addLabelToList( "-pau-" );
1016     addLabelToList( "-pau2-" );
1017 }
1018 
writeMapFile(std::string & fileName)1019 void GRXMLDoc::writeMapFile( std::string & fileName )
1020 {
1021     // We need to re-index in order to put the labels in correct order:
1022     // 1. eps
1023     // 2. all slots
1024     // 3. all rules
1025     // 4. -pau- words
1026     // 5. remaining labels
1027     ofstream outfile;
1028     int index, origIndex;
1029     std::string label;
1030     std::string slotRuleName;
1031     std::string scope; // For rules
1032     HashMap<int,std::string> orderedList;
1033     int orderedIndex=0;
1034     // 1. eps
1035     orderedList.insert( orderedIndex++, "eps" );
1036 
1037     // 2. slots
1038     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
1039 	if ( IsSlot( label ) ) {
1040 	    orderedList.insert( orderedIndex++, label );
1041 	}
1042 	while (m_LabelList.getNext( &origIndex, &label ) ) {
1043 	    if ( IsSlot( label ) ) {
1044 		orderedList.insert( orderedIndex++, label );
1045 	    }
1046 	}
1047     }
1048 
1049     // 3.  Now rules, or anything with @
1050     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
1051 	do {
1052 #if GRXML_DEBUG
1053 	    std::cout << label << " "<< label.find_first_of ("@") << std::endl;
1054 #endif
1055             if (!IsSlot(label) && label.find_first_of ("@") != string::npos) {
1056 #if GRXML_DEBUG
1057 		std::cout << "    Adding " << label << std::endl;
1058 #endif
1059 		orderedList.insert( orderedIndex++, label );
1060 	    }
1061 	} while (m_LabelList.getNext( &origIndex, &label ) );
1062     }
1063 
1064     // 4. pau
1065     orderedList.insert( orderedIndex++, "-pau-" );
1066     orderedList.insert( orderedIndex++, "-pau2-" );
1067 
1068     // 5. Remaining stuff. NB We depend upon the label not
1069     //    being added twice.
1070     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
1071 	if ( !orderedList.getIndex( label, &index ) ) {
1072 	  orderedList.insert( orderedIndex++, label );
1073 	}
1074 	while (m_LabelList.getNext( &origIndex, &label ) ) {
1075 	    if ( !orderedList.getIndex( label, &index ) ) {
1076 	      orderedList.insert( orderedIndex++, label );
1077 	    }
1078 	}
1079     }
1080     outfile.open ( fileName.c_str() );
1081 
1082     bool bRes = orderedList.getFirst( &index, &label );
1083     do {
1084       if(!bRes) break;
1085       // Look up scope using original index
1086       m_LabelList.getIndex( label, &origIndex );
1087       if (m_RuleScope.getValue(origIndex, &scope) )
1088 	label = scope + ":" + label;
1089       outfile << label << " " << index << std::endl;
1090       bRes = orderedList.getNext( &index, &label );
1091     } while(bRes);
1092 
1093     outfile.close();
1094 }
1095 
1096 
writeScriptFile(std::string & fileName)1097 void GRXMLDoc::writeScriptFile( std::string & fileName )
1098 {
1099     ofstream outfile;
1100     int index;
1101     std::string label;
1102     outfile.open ( fileName.c_str() );
1103     if ( m_TagList.getFirst( &index, &label ) ) {
1104     	outfile << index << " " << label << std::endl;
1105     }
1106     while (m_TagList.getNext( &index, &label ) ) {
1107     	outfile << index << " " << label << std::endl;
1108     }
1109     outfile.close();
1110 
1111     //m_LabelList.writeFile( fileName );
1112 }
1113 
writeParamsFile(std::string & fileName)1114 void GRXMLDoc::writeParamsFile( std::string & fileName )
1115 {
1116   std::string wtw;
1117   ofstream outfile;
1118   bool bRes;
1119 
1120   outfile.open(fileName.c_str());
1121 
1122   std::string metaname = "word_penalty";
1123   bRes = findMeta(metaname, wtw);
1124   if(bRes)
1125     outfile << metaname.c_str() << "\t=\t" << wtw.c_str() << std::endl;
1126 
1127   // outfile << "locale"  << "\t=\t" << m_XMLLanguage << std::endl;
1128   outfile.close();
1129 }
1130 
writeGraphFiles(std::string & prefix,bool bDoWriteRecogGraphs)1131 void GRXMLDoc::writeGraphFiles( std::string& prefix, bool bDoWriteRecogGraphs)
1132 {
1133     SubGraph *p_SubGraph;
1134     SubGraph *p_SemGraph;
1135     std::string fileName;
1136     if ( !findSubGraph( m_RootRule, p_SubGraph ) ) {
1137 	FATAL_ERROR ("ERROR: writeGraphFiles - no root rule "+ m_RootRule + " defined. No file created", -1 );
1138     }
1139 
1140     //  Create .P.txt
1141     printf ("\nCreating semantic graph file\n");
1142     p_SemGraph = new SubGraph( (char *) "Main", -1);
1143     m_pGraph->BeginRule( p_SemGraph );
1144     m_pGraph->AddRuleRef( p_SemGraph, p_SubGraph->getRuleId());
1145     m_pGraph->EndRule( p_SemGraph );
1146     m_pGraph->ExpandRules (p_SemGraph);
1147     p_SemGraph->RemoveInternalConnections ();
1148 
1149     p_SemGraph->AddTerminalConnections ();
1150     p_SemGraph->ReduceArcsByEquivalence();
1151     p_SemGraph->RemoveUnreachedConnections (-1, -1);
1152     p_SemGraph->DeterminizeArcs();
1153     p_SemGraph->RemoveUnreachedConnections (-1, -1);
1154     p_SemGraph->ReduceArcsByEquivalence();
1155     p_SemGraph->RemoveUnreachedConnections (-1, -1);
1156     fileName = prefix + ".P.txt";
1157     p_SemGraph->WriteForwardGraphWithSemantic( fileName, *this );
1158     delete p_SemGraph;
1159 
1160     fileName = prefix + ".omap";
1161     this->WriteOLabels(fileName);
1162 }
1163 
sortLabels()1164 void GRXMLDoc::sortLabels()
1165 {
1166     // We need to re-index in order to put the labels in correct order:
1167     int index=0, origIndex;
1168     std::string label;
1169     std::string slotRuleName;
1170     std::string scope; // For rules
1171     std::vector <std::string> orderedList;
1172     if ( m_LabelList.getFirst( &origIndex, &label ) ) {
1173         // Look up scope using original index
1174         orderedList.push_back( label );
1175         while (m_LabelList.getNext( &origIndex, &label ) ) {
1176             orderedList.push_back( label );
1177         }
1178     }
1179     std::sort(orderedList.begin(), orderedList.end() );
1180     m_SortedLabelList.clear();
1181     index=0;
1182     for (std::vector<std::string>::const_iterator citer = orderedList.begin();
1183      citer != orderedList.end(); ++citer) {
1184         label = *citer;
1185         m_LabelList.getIndex( label, &origIndex );
1186         m_SortedLabelList.insert( index, label );
1187         index++;
1188         // std::cout <<"Sorted: " << index <<" " << label <<std::endl;
1189     }
1190     return;
1191 }
1192 
findSortedLabel(int i,std::string & s)1193 bool GRXMLDoc::findSortedLabel(int i, std::string &s )
1194 {
1195     if (m_SortedLabelList.isEmpty() ) {
1196         sortLabels(); // Create the sorted label list.
1197     }
1198     return m_SortedLabelList.getValue( i, &s );
1199 }
1200 
findSortedLabelIndex(int i,int & sortedIndex)1201 bool GRXMLDoc::findSortedLabelIndex( int i, int &sortedIndex )
1202 {
1203     std::string s;
1204     if (m_SortedLabelList.isEmpty() ) {
1205         sortLabels(); // Create the sorted label list.
1206     }
1207     if ( m_LabelList.getValue( i, &s ) ) {
1208         if ( m_SortedLabelList.getIndex(s, &sortedIndex )) {
1209             return true;
1210         }
1211     }
1212     return false;
1213 }
1214 
addOLabelToOList(std::string & s)1215 void GRXMLDoc::addOLabelToOList( std::string &s)
1216 {
1217     m_OutputPtxtLabels.insert( s, 0);
1218 }
1219 
WriteOLabels(const std::string & fileName)1220 bool GRXMLDoc::WriteOLabels(const std::string& fileName)
1221 {
1222   HashMap<int,std::string> invMap;
1223   int count = 0;
1224   int max_script_label = 0;
1225   int scriptID = 0;
1226   std::map<std::string, int>::iterator iter;
1227   bool bFound;
1228   int tmp;
1229 
1230   std::string strIndex = "eps";
1231   bFound = m_OutputPtxtLabels.getValue(strIndex, &tmp);
1232   if(bFound)
1233     m_OutputPtxtLabels.remove(strIndex);
1234   m_OutputPtxtLabels.insert(strIndex, count);
1235   invMap.insert( count, strIndex);
1236   count++;
1237 
1238   strIndex = "{";
1239   bFound = m_OutputPtxtLabels.getValue(strIndex, &tmp);
1240   if(bFound)
1241     m_OutputPtxtLabels.remove(strIndex);
1242   m_OutputPtxtLabels.insert(strIndex, count);
1243   invMap.insert( count, strIndex);
1244   count++;
1245 
1246   iter = m_OutputPtxtLabels.begin();
1247   for( ; iter!=m_OutputPtxtLabels.end(); iter++) {
1248     const char* label = iter->first.c_str();
1249     if( !strncmp(label,SCRIPT_LABEL_PREFIX, SCRIPT_LABEL_PREFIX_LEN)
1250 	&& strspn(label+SCRIPT_LABEL_PREFIX_LEN,"0123456789")==strlen(label+SCRIPT_LABEL_PREFIX_LEN) ) {
1251       scriptID = atoi(label+SCRIPT_LABEL_PREFIX_LEN);
1252       if(max_script_label < scriptID)
1253 	max_script_label = scriptID;
1254     }/* else if( !strncmp(label,SCRIPT_LABEL_PREFIX, SCRIPT_LABEL_PREFIX_LEN)) {
1255       invMap.insert(count, iter->first);
1256       iter->second = count;
1257       count++;
1258       }*/
1259     else if(!invMap.getIndex((iter->first), &tmp)){
1260       invMap.insert(count, iter->first);
1261       iter->second = count;
1262       count++;
1263     }
1264   }
1265 
1266   cout << "found max_script_label " << max_script_label << endl;
1267   for(int j=0; j<=max_script_label; j++) {
1268     std::stringstream ss;
1269     ss << SCRIPT_LABEL_PREFIX << j;
1270     if(!invMap.getIndex( ss.str(), &tmp)) {
1271       invMap.insert( count++, ss.str());
1272     }
1273   }
1274 
1275   std::ofstream outfile(fileName.c_str());
1276   std::string outscript;
1277   if(!outfile) {
1278     FATAL_ERROR( "Error: opening the omap file for output", 1);
1279     WARNING( "Error: opening the omap file for output");
1280     return 1;
1281   }
1282   for(int i=0; i<count; i++) {
1283     outscript = "";
1284     invMap.getValue(i,&outscript);
1285     if(outscript.length() == 0) {
1286       cout << "error: internal error while making .omap " << i << endl;
1287       FATAL_ERROR("error",1);
1288     }
1289     outfile << outscript.c_str() << " " << i << std::endl;
1290   }
1291   outfile.close();
1292   return 0;
1293 }
1294