1 /*---------------------------------------------------------------------------* 2 * grxmldoc.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 21 #ifndef __grxmldoc_h__ 22 #define __grxmldoc_h__ 23 24 // #define MEMTRACE // Uses mtrace() to detect leaks 25 26 #include "hashmap.h" 27 #include "tinyxml.h" 28 #include <stack> 29 #include "vocab.h" 30 31 #define SCRIPT_LABEL_PREFIX "_" 32 #define SCRIPT_LABEL_PREFIX_LEN 1 33 class Node; 34 template <typename T1, typename T2> class HashMap; 35 class Graph; 36 class SubGraph; 37 38 class GRXMLDoc 39 { 40 public: 41 typedef TiXmlNode XMLNode; 42 // Some convenience items for string comparison 43 enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue}; 44 typedef std::map<std::string, KeywordValues> KEYWDPAIR; 45 46 typedef struct { 47 bool hasRuleRef; 48 std::string RuleRefName; 49 int tagID; 50 } ItemData; 51 52 GRXMLDoc(); 53 ~GRXMLDoc(); 54 55 // Optional use of voc and model files 56 // TODO: Rearrange access to voc and models 57 #ifndef OPENFSTSDK 58 void initialize_SR(char* parfile); 59 void shutdown_SR(); getVocabulary()60 Vocabulary *getVocabulary() { return m_pVocab;} getModel()61 AcousticModel* getModel() { return m_pModel;} 62 int addPhonemeToList( std::string const& s ); 63 bool findPhoneme( int i, std::string & s ); 64 bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence); 65 #endif 66 67 // Lookup functions 68 bool findSubGraph(std::string & s, SubGraph *&p_SubGraph); 69 bool findRule(int i, std::string &s ); 70 bool findTag(int i, std::string &s ); 71 bool findLabel(int i, std::string &s ); 72 bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s ); 73 bool findRuleIndex( std::string s, int &i ); 74 bool findTagIndex( std::string s, int &i ); 75 bool findLabelIndex( std::string s, int &i ); 76 bool findSortedLabel(int i, std::string &s ); 77 bool findSortedLabelIndex( int i, int &sortedIndex ); 78 bool findMeta(const std::string & sn, std::string &s); 79 bool setMeta(const std::string & sn, const std::string &s); 80 void sortLabels(); 81 void addOLabelToOList( std::string & s); 82 bool WriteOLabels(const std::string& fileName); 83 84 // Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists. 85 bool parseGrammar( XMLNode &node, std::string & xMLFileName ); 86 87 // Generate output files 88 void writeMapFile( std::string & fileName ); 89 void writeScriptFile( std::string & fileName ); 90 void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs ); 91 void writeParamsFile( std::string & fileName ); 92 void printLists(); 93 void printSubgraphs(); 94 95 protected: 96 void initializeLists(); 97 bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); 98 bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); 99 bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); 100 bool beginParseGrammarNode( XMLNode &node ); 101 bool endParseGrammarNode( XMLNode &node ); 102 bool beginParseMetaNode( XMLNode &node ); 103 bool endParseMetaNode( XMLNode &node ); 104 bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph); 105 bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph ); 106 bool beginItem( XMLNode &node, SubGraph *&p_SubGraph ); 107 bool endItem( XMLNode &node, SubGraph *&p_SubGraph ); 108 bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph ); 109 bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph ); 110 bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph ); 111 bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph ); 112 bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph ); 113 bool fixRuleRef( SubGraph *&p_SubGraph ); 114 bool getRuleRefName(XMLNode &node, std::string &ruleName); 115 bool extendAltExpression( XMLNode &node, int level ); 116 bool beginTag( XMLNode &node, SubGraph *&p_SubGraph ); 117 bool endTag( XMLNode &node, SubGraph *&p_SubGraph ); 118 bool beginCount( XMLNode &node, SubGraph *&p_SubGraph ); 119 bool endCount( XMLNode &node, SubGraph *&p_SubGraph ); 120 void printNode( XMLNode &node, int level ); 121 bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph); 122 123 bool deleteRules(); 124 bool addTagToList( std::string const& s ); 125 bool addLabelToList( std::string const& s ); 126 void printSubgraph( SubGraph &p_SubGraph ); 127 128 private: 129 130 Graph *m_pGraph; // The top-level container object for the word graph; 131 KEYWDPAIR m_NodeKeyWords; 132 // The unique attributes of the GRML doc 133 std::string m_XMLMode; 134 std::string m_XMLLanguage; 135 std::string m_RootRule; 136 std::string m_XMLTagFormat; 137 std::string m_XMLVersion; 138 std::string m_XMLBase; 139 std::string m_XMLFileName; 140 141 // We store indices for all labels used in the word graph. 142 // Store all these labels in the m_LabelList table, which is auto-indexed. 143 // We need a list of the rule names so that we can distinguish them from other labels. 144 // Store these rule names in the m_RuleList table with an index equal to the label index for the rule. 145 // Thus, when we need the index of a rule, we go straight to m_RuleList 146 // and when we need the label of a rule or any other item we use m_LabelList. 147 148 HashMap<std::string,SubGraph*> m_SubgraphList; 149 HashMap<int,std::string> m_TagList; // <item tag = ... 150 HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names 151 HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo 152 HashMap<int, std::string> m_PhonemeList; // Stores triphones 153 HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules. 154 HashMap<int, std::string> m_RuleScope; 155 HashMap<int, std::string> m_SlotList; 156 HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value 157 HashMap<std::string, int> m_OutputPtxtLabels; 158 159 std::stack<ItemData*> m_ItemVarsStack; 160 std::stack<std::string> m_RuleListStack; 161 int m_RuleAutoIndex; 162 int m_TagAutoIndex; 163 int m_LabelAutoIndex; 164 int m_PhonemeAutoIndex; 165 int m_ExpandedRulesAutoIndex; 166 int m_TagID; // Use to stash tag index for items. 167 // Note the subgraph list does not have an auto-index as it is string-indexed. 168 // All these lists also have an internal numeric index which can be used. 169 170 #ifndef OPENFSTSDK 171 Vocabulary *m_pVocab; 172 AcousticModel *m_pModel; 173 #endif 174 175 }; 176 177 #endif // __grxmldoc_h__ 178 179 180 181