1 /* FILE: sub_grph.h 2 * DATE MODIFIED: 31-Aug-07 3 * DESCRIPTION: Part of the SREC graph compiler project source files. 4 * 5 * Copyright 2007, 2008 Nuance Communciations, Inc. * 6 * * 7 * Licensed under the Apache License, Version 2.0 (the 'License'); * 8 * you may not use this file except in compliance with the License. * 9 * * 10 * You may obtain a copy of the License at * 11 * http://www.apache.org/licenses/LICENSE-2.0 * 12 * * 13 * Unless required by applicable law or agreed to in writing, software * 14 * distributed under the License is distributed on an 'AS IS' BASIS, * 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 16 * See the License for the specific language governing permissions and * 17 * limitations under the License. * 18 * * 19 *---------------------------------------------------------------------------*/ 20 21 #ifndef __sub_grph_h__ 22 #define __sub_grph_h__ 23 24 #include "netw_arc.h" 25 26 #include "vocab.h" 27 28 // TODO: 29 // Guards various stages 1. Compiling 2. Processing 30 31 #define SCOPE_ROOT 0 32 #define SCOPE_ITEM -1 33 #define SCOPE_RULE -2 34 #define SCOPE_ONEOF -3 35 #define SCOPE_COUNT -4 36 #define SCOPE_OPT -5 37 #define SCOPE_REPEAT -6 38 39 #define NONE_LABEL -100000 // Null transition 40 #define TAG_LABEL -100001 // Tag transition 41 #define WB_LABEL -100002 // Word boundary transition 42 #define TERMINAL_LABEL -100003 // Terminal transition 43 #define BEGINSCOPE_LABEL -100004 // Start of scope 44 #define ENDSCOPE_LABEL -100005 // End of scope 45 #define BEGINRULE_LABEL -100006 // Start of rule 46 #define ENDRULE_LABEL -100007 // End of rule 47 #define DISCARD_LABEL -100010 // Discard (internal) 48 #define INITIAL_LABEL -100011 // Initial silence 49 #define FINAL_LABEL -100012 // Final silence 50 51 #define MAXNUM 100000 // A large number 52 #define BLKSIZE 10000 // Buffer size 53 #define MAXITS 5 // Maximum equivalence reduction iterations 54 55 // #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) 56 // #define MIN(X,Y) ((X) > (Y) ? (X) : (Y)) 57 58 // General functions 59 bool IsSlot (std::string label); 60 61 62 class DetCache { 63 public: 64 DetCache()65 DetCache () { num= 0; dataPack= 0; }; 66 AddEntry(int comb,int first,int second)67 void AddEntry (int comb, int first, int second) { 68 if (num == 0) 69 dataPack= new int [3*BLKSIZE]; 70 else if ((num%BLKSIZE) == 0) { 71 int *newPack = new int [num+3*BLKSIZE]; 72 for (int ii= 0; ii < (3*num); ii++) 73 newPack[ii]= dataPack[ii]; 74 delete [] dataPack; 75 dataPack= newPack; 76 } 77 dataPack[3*num]= first; 78 dataPack[3*num+1]= second; 79 dataPack[3*num+2]= comb; 80 num++; 81 return; 82 }; 83 QueryEntry(int first,int second)84 int QueryEntry (int first, int second) { 85 if (!dataPack) 86 return -1; 87 for (int ii= 0; ii < num; ii++) 88 if (dataPack[3*ii] == first && dataPack[3*ii+1] == second) 89 return (dataPack[3*ii+2]); 90 return -1; 91 } 92 ~DetCache()93 ~DetCache () { if (dataPack) delete [] dataPack; }; 94 95 private: 96 int num; 97 int *dataPack; 98 }; 99 100 101 class SubGraph 102 { 103 public: 104 SubGraph(char * name,int ruleNo)105 SubGraph (char *name, int ruleNo) 106 { 107 int count= strlen(name); 108 title= new char [count+1]; 109 strcpy (title, name); 110 // printf ("New Rule %s\n", title); 111 ruleId= ruleNo; 112 popOp= 0; 113 numArc= 0; 114 numVertex= 0; 115 lastScope= SCOPE_ROOT; 116 startId= NewVertexId(); 117 lastId= startId; 118 endId= lastId; 119 forwardList= 0; 120 backwardList= 0; 121 sortNum= 0; 122 vertexProp= 0; 123 sizeProp= 0; 124 return; 125 }; 126 127 SubGraph (SubGraph *baseG); 128 ~SubGraph()129 ~SubGraph () 130 { 131 delete [] title; 132 for (int ii= 0; ii < numArc; ii++) 133 delete arc[ii]; 134 if (numArc >0) 135 delete [] arc; 136 if (forwardList) 137 delete [] forwardList; 138 if (backwardList) 139 delete [] backwardList; 140 return; 141 } 142 getName(char * name,int maxlen)143 void getName (char *name, int maxlen) 144 { 145 strncpy (name, title, maxlen); 146 } 147 getRuleId()148 int getRuleId () { 149 return ruleId; 150 } 151 152 /** Creates a copy of the graph */ 153 void CopyFastArcs (SubGraph *subG, int startLoc, int endLoc, int offset, int headId, int newHeadId, int tailId, int newTailId); 154 155 /** Graph construction routines */ 156 void BeginScope (int scopeType, int arg1, int arg2); 157 void EndScope (); 158 int AddItem (int inputLabel, int tagLabel); 159 int AddTag (int tagLabel); 160 void ExpandRules (SubGraph **subList, int *lookupList, int numSubs); 161 162 /** Reverse subgraph */ 163 void ReverseGraphForOutput (); 164 void SortLanguage (); 165 void SortLanguageForMin (); 166 void SortLanguageReverse (); 167 void UpdateSortForLanguage (); 168 169 void RemoveRuleConnections (); 170 void RemoveInternalConnections (); 171 void RemoveUnreachedConnections (int startPoint, int endPoint); 172 void RemoveUnreachedConnectionsDebug (int startPoint, int endPoint); 173 void RemoveTagConnections (int startPoint, int endPoint); 174 void AddTerminalConnections (); 175 176 void Print (); 177 void PrintText (); 178 void PrintWithLabels( GRXMLDoc &p_Doc ); 179 void RemapForSortedOutput ( GRXMLDoc & doc ); 180 181 void WriteForwardGraphWithSemantic ( std::string & fileName, GRXMLDoc &p_Doc ); 182 void WriteForwardGraphFile ( std::string & fileName, GRXMLDoc &p_Doc ); 183 void WriteFile ( std::string & fileName, GRXMLDoc &p_Doc ); 184 void WritePhonemeGraphFile( std::string & fileName, GRXMLDoc &p_Doc ); 185 void WriteHMMGraphFile( std::string & fileName, GRXMLDoc &p_Doc ); 186 187 void DeterminizeArcs (); 188 int IsDeterminized (int currId); 189 void ReduceArcsByEquivalence (); 190 191 void ExpandPhonemes ( GRXMLDoc & doc ); 192 void AddLeftContexts (); 193 void AddRightContexts (); 194 void ExpandToHMMs ( GRXMLDoc & doc ); 195 void ExpandIntraWordSilence ( GRXMLDoc & doc ); 196 void ClearOutputs (); 197 void ShiftOutputsToLeft (); 198 void FinalProcessing ( GRXMLDoc & doc ); 199 200 void DebugPrintDirective (char *dirrData); 201 void DebugPrintLabel (int labId); 202 203 private: 204 205 int NewVertexId (); 206 int numVertex; 207 208 void AllocateSpaceForArc (); 209 NUANArc *CreateArc (int iLabel, int oLabel, int from, int to); 210 NUANArc *InheritArc (NUANArc *arcBase, int id); 211 NUANArc *InheritReverseArc (NUANArc *arcBase, int id); 212 NUANArc *CreateCopyWithOutput (NUANArc *arcBase, int id); 213 NUANArc *InheritReverseArcWithTag (NUANArc *arcBase, int id, int tagId); 214 int numArc; 215 NUANArc **arc; 216 int sortNum; 217 int sortRevNum; 218 int *forwardList; 219 int *backwardList; 220 221 int ConnectLastScope (int beginScopeId, int endScopeId); 222 int CloseScope (); 223 void UpdateVertexCount (int startLoc); 224 225 int FindFromIndex (int element); 226 int FindToIndex (int element); 227 void PullUpBegins (int currId, int baseId, int endId, int procLabel, int *nodeList, int currNum, int maxNum); 228 void ProcessBegins (int currId, int endId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum); 229 void PullUpEnds (int currId, int baseId, int initialId, int procLabel, int *nodeList, int currNum, int maxNum); 230 void ProcessEnds (int currId, int initialId, int procLabel, int *nodeList, int currNum, int *visitMark, int maxNum); 231 bool PullUpTags (int currId, int baseId, int initialId, int outTag, int *nodeList, int currNum, int maxNum); 232 void ProcessTags (int currId, int initialId, int *nodeList, int currNum, int *visitMark, int maxNum); 233 void ClearDuplicateArcs (); 234 235 void RemoveRuleStarts (int startId, int endId); 236 void RemoveRuleEnds (int startId, int endId); 237 void RemoveNulls (int startPoint, int endPoint); 238 void RemoveForwardConnections (int startId, int endId); 239 void RemoveBackwardConnections (int startId, int endId); 240 void ClearLabelledConnections (int labItem); 241 void RemoveUnvisitedArcs (int initialId, int finalId); 242 void RemoveMarkedNodes (int *forwardDepth, int *reverseDepth); 243 void RemoveDiscardedArcs (); 244 void RenumberNodes (); 245 246 bool EquivalenceTestForward (int firstId, int secondId, int *equivMap); 247 void ForwardDepthData (int startId, int *depthMap, int depth); 248 void ReverseDepthData (int startId, int *depthMap, int depth); 249 void ReverseDepthOnTerminal (int *depthMap); 250 251 void MapGraphVertices (int *equivMap); 252 void IdentifyEquivalence (int *depthMap, int *equivMap); 253 void CheckForChangeAndResort (int currId, int *mapList); 254 255 void SortLanguageAtIndices (int begIx, int endIx); 256 void SortLanguageAtIndicesForMin (int begIx, int endIx); 257 void SortLanguageAtSortIndices (int begIx, int endIx); 258 bool CheckSort (); 259 bool CheckSortReverse (); 260 void RemoveDuplicatesAtNode (int rixBegin, int rixEnd); 261 void MergeVertices (int newId, int firstId, int secondId); 262 void DeterminizeAtVertex (int baseId, DetCache *cache); 263 int PairwiseDeterminize (int baseId, int firstId, int fixStart, int secondId, int sixStart, DetCache *cache); 264 void ClearRuleIds (); 265 void ViewNode (int currId); 266 267 void ReverseMarkArcs (); 268 void ReverseMarkOutput (int currId, int initialId, int outId); 269 void MarkNodesByOutputAndClearArcs (); 270 void ExpandWordBoundaries ( GRXMLDoc & doc ); 271 void AddInitialFinalSilences ( GRXMLDoc & doc ); 272 273 void UpdateVisitationCache (int startNo); 274 void ClearVisitationCache (); 275 void SetupVisitationCache (); 276 void DecVisitationCache (int currId); 277 void IncVisitationCache (int currId); 278 int VisitationConsistencyCheck (); 279 CreateNodeProperty()280 void CreateNodeProperty () { 281 vertexProp= new int [BLKSIZE]; 282 vertexMinned= new bool [BLKSIZE]; 283 sizeProp= BLKSIZE; 284 for (int ii= 0; ii < sizeProp; ii++) { 285 vertexProp[ii]= 0; 286 vertexMinned[ii]= false; 287 } 288 return; 289 } 290 IncNodeProperty(int vertNo)291 void IncNodeProperty (int vertNo) { 292 if (vertNo >= sizeProp) { 293 int newSize= (vertNo/BLKSIZE + 1)*BLKSIZE; 294 int *newPack = new int [newSize]; 295 bool *newMinned = new bool [newSize]; 296 int ii; 297 for (ii= 0; ii < sizeProp; ii++) { 298 newPack[ii]= vertexProp[ii]; 299 newMinned[ii]= vertexMinned[ii]; 300 } 301 for (ii= sizeProp; ii < newSize; ii++) { 302 newPack[ii]= 0; 303 newMinned[ii]= false; 304 } 305 delete [] vertexProp; 306 delete [] vertexMinned; 307 vertexProp= newPack; 308 vertexMinned= newMinned; 309 sizeProp= newSize; 310 } 311 vertexProp[vertNo]++; 312 return; 313 } 314 DecNodeProperty(int vertNo)315 void DecNodeProperty (int vertNo) { 316 if (vertNo < sizeProp) 317 vertexProp[vertNo]--; 318 return; 319 } 320 SetNodeProperty(int vertNo,int value)321 void SetNodeProperty (int vertNo, int value) { 322 if (vertNo < sizeProp) 323 vertexProp[vertNo]= value; 324 return; 325 } 326 SetMinProperty(int vertNo)327 void SetMinProperty (int vertNo) { 328 if (vertNo < sizeProp) 329 vertexMinned[vertNo]= true; 330 return; 331 } 332 QueryNodeProperty(int vertNo)333 int QueryNodeProperty (int vertNo) { 334 if (vertNo < sizeProp) 335 return (vertexProp[vertNo]); 336 return -1; 337 } 338 QueryMinProperty(int vertNo)339 int QueryMinProperty (int vertNo) { 340 if (vertNo < sizeProp) 341 return (vertexMinned[vertNo]); 342 return false; 343 } 344 DeleteNodeProperty()345 void DeleteNodeProperty () { 346 delete [] vertexProp; 347 delete [] vertexMinned; 348 vertexProp= 0; 349 vertexMinned= 0; 350 } 351 352 353 /* Stacking of operations and related data 354 */ 355 char *title; 356 int ruleId; 357 int lastScope; 358 int startId; 359 int endId; 360 int prevStartId; 361 int prevEndId; 362 int lastId; 363 int arg1; 364 int arg2; 365 int arcLoc; 366 int opStack[100000]; // TODO: remove hard coded number 367 int popOp; 368 void pushScope (); 369 void popScope (); 370 int silenceId; 371 int intraId; 372 int *vertexProp; // Vertex properties 373 bool *vertexMinned; // Vertex properties 374 int sizeProp; 375 }; 376 377 #endif // __sub_grph_h__ 378