• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  grxmldoc.h  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 #ifndef __grxmldoc_h__
22 #define  __grxmldoc_h__
23 
24 // #define MEMTRACE // Uses mtrace() to detect leaks
25 
26 #include "hashmap.h"
27 #include "tinyxml.h"
28 #include <stack>
29 #include "vocab.h"
30 
31 #define SCRIPT_LABEL_PREFIX "_"
32 #define SCRIPT_LABEL_PREFIX_LEN 1
33 class Node;
34 template <typename T1, typename T2> class HashMap;
35 class Graph;
36 class SubGraph;
37 
38 class GRXMLDoc
39 {
40 public:
41     typedef TiXmlNode XMLNode;
42     // Some convenience items for string comparison
43     typedef enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue};
44     typedef  std::map<std::string, KeywordValues> KEYWDPAIR;
45 
46     typedef struct {
47 	bool hasRuleRef;
48 	std::string RuleRefName;
49 	int tagID;
50     } ItemData;
51 
52     GRXMLDoc();
53     ~GRXMLDoc();
54 
55     // Optional use of voc and model files
56    // TODO: Rearrange access to voc and models
57 #ifndef OPENFSTSDK
58     void initialize_SR(char* parfile);
59     void shutdown_SR();
getVocabulary()60     Vocabulary *getVocabulary() { return m_pVocab;}
getModel()61     AcousticModel* getModel() { return m_pModel;}
62     int addPhonemeToList( std::string const& s );
63     bool findPhoneme( int i, std::string & s );
64     bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence);
65 #endif
66 
67     //  Lookup functions
68     bool findSubGraph(std::string & s, SubGraph *&p_SubGraph);
69     bool findRule(int i, std::string &s );
70     bool findTag(int i, std::string &s );
71     bool findLabel(int i, std::string &s );
72     bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s );
73     bool findRuleIndex( std::string s, int &i );
74     bool findTagIndex( std::string s, int &i );
75     bool findLabelIndex( std::string s, int &i );
76     bool findSortedLabel(int i, std::string &s );
77     bool findSortedLabelIndex( int i, int &sortedIndex );
78     bool findMeta(const std::string & sn, std::string &s);
79     bool setMeta(const std::string & sn, const std::string &s);
80     void sortLabels();
81     void addOLabelToOList( std::string & s);
82     bool WriteOLabels(const std::string& fileName);
83 
84     // Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists.
85     bool parseGrammar( XMLNode &node, std::string & xMLFileName );
86 
87     // Generate output files
88     void writeMapFile( std::string & fileName );
89     void writeScriptFile( std::string & fileName );
90     void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs );
91     void writeParamsFile( std::string & fileName );
92     void printLists();
93     void printSubgraphs();
94 
95 protected:
96     void initializeLists();
97     bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
98     bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
99     bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
100     bool beginParseGrammarNode( XMLNode &node );
101     bool endParseGrammarNode( XMLNode &node );
102     bool beginParseMetaNode( XMLNode &node );
103     bool endParseMetaNode( XMLNode &node );
104     bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph);
105     bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph );
106     bool beginItem( XMLNode &node, SubGraph *&p_SubGraph );
107     bool endItem( XMLNode &node, SubGraph *&p_SubGraph );
108     bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph );
109     bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph );
110     bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph );
111     bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph );
112     bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph );
113     bool fixRuleRef( SubGraph *&p_SubGraph );
114     bool getRuleRefName(XMLNode &node, std::string &ruleName);
115     bool extendAltExpression( XMLNode &node, int level );
116     bool beginTag( XMLNode &node, SubGraph *&p_SubGraph );
117     bool endTag( XMLNode &node, SubGraph *&p_SubGraph );
118     bool beginCount( XMLNode &node, SubGraph *&p_SubGraph );
119     bool endCount( XMLNode &node, SubGraph *&p_SubGraph );
120     void printNode( XMLNode &node, int level );
121     bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph);
122 
123     bool deleteRules();
124     bool addTagToList( std::string const& s );
125     bool addLabelToList( std::string const& s );
126     void printSubgraph( SubGraph &p_SubGraph );
127 
128 private:
129 
130     Graph *m_pGraph;	// The top-level container object for the word graph;
131     KEYWDPAIR  m_NodeKeyWords;
132     // The unique attributes of the GRML doc
133     std::string m_XMLMode;
134     std::string m_XMLLanguage;
135     std::string m_RootRule;
136     std::string m_XMLTagFormat;
137     std::string m_XMLVersion;
138     std::string m_XMLBase;
139     std::string m_XMLFileName;
140 
141     //  We store indices for all labels used in the word graph.
142     //  Store all these labels in the m_LabelList table, which is auto-indexed.
143     //  We need a list of the rule names so that we can distinguish them from other labels.
144     //  Store these rule names in the m_RuleList table with an index equal to the label index for the rule.
145     //  Thus, when we need the index of a rule, we go straight to m_RuleList
146     //	and when we need the label of a rule or any other item we use m_LabelList.
147 
148     HashMap<std::string,SubGraph*> m_SubgraphList;
149     HashMap<int,std::string> m_TagList;	// <item tag = ...
150     HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names
151     HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo
152     HashMap<int, std::string> m_PhonemeList;    // Stores triphones
153     HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules.
154     HashMap<int, std::string> m_RuleScope;
155     HashMap<int, std::string> m_SlotList;
156     HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value
157     HashMap<std::string, int> m_OutputPtxtLabels;
158 
159     std::stack<ItemData*> m_ItemVarsStack;
160     std::stack<std::string> m_RuleListStack;
161     int m_RuleAutoIndex;
162     int m_TagAutoIndex;
163     int m_LabelAutoIndex;
164     int m_PhonemeAutoIndex;
165     int m_ExpandedRulesAutoIndex;
166     int m_TagID; // Use to stash tag index for items.
167     // Note the subgraph list does not have an auto-index as it is string-indexed.
168     // All these lists also have an internal numeric index which can be used.
169 
170 #ifndef OPENFSTSDK
171     Vocabulary *m_pVocab;
172     AcousticModel *m_pModel;
173 #endif
174 
175 };
176 
177 #endif // __grxmldoc_h__
178 
179 
180 
181