1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */package org.antlr.tool; 28 29 import org.antlr.analysis.Label; 30 import org.antlr.analysis.NFAState; 31 import org.antlr.grammar.v3.ANTLRParser; 32 import org.antlr.grammar.v3.AssignTokenTypesWalker; 33 import org.antlr.misc.Utils; 34 import org.antlr.runtime.RecognitionException; 35 import org.antlr.runtime.tree.CommonTreeNodeStream; 36 37 import java.util.*; 38 39 /** A tree of component (delegate) grammars. 40 * 41 * Rules defined in delegates are "inherited" like multi-inheritance 42 * so you can override them. All token types must be consistent across 43 * rules from all delegate grammars, so they must be stored here in one 44 * central place. 45 * 46 * We have to start out assuming a composite grammar situation as we can't 47 * look into the grammar files a priori to see if there is a delegate 48 * statement. Because of this, and to avoid duplicating token type tracking 49 * in each grammar, even single noncomposite grammars use one of these objects 50 * to track token types. 51 */ 52 public class CompositeGrammar { 53 public static final int MIN_RULE_INDEX = 1; 54 55 public CompositeGrammarTree delegateGrammarTreeRoot; 56 57 /** Used during getRuleReferenceClosure to detect computation cycles */ 58 protected Set<NFAState> refClosureBusy = new HashSet<NFAState>(); 59 60 /** Used to assign state numbers; all grammars in composite share common 61 * NFA space. This NFA tracks state numbers number to state mapping. 62 */ 63 public int stateCounter = 0; 64 65 /** The NFA states in the NFA built from rules across grammars in composite. 66 * Maps state number to NFAState object. 67 * This is a Vector instead of a List because I need to be able to grow 68 * this properly. After talking to Josh Bloch, Collections guy at Sun, 69 * I decided this was easiest solution. 70 */ 71 protected Vector<NFAState> numberToStateList = new Vector<NFAState>(1000); 72 73 /** Token names and literal tokens like "void" are uniquely indexed. 74 * with -1 implying EOF. Characters are different; they go from 75 * -1 (EOF) to \uFFFE. For example, 0 could be a binary byte you 76 * want to lexer. Labels of DFA/NFA transitions can be both tokens 77 * and characters. I use negative numbers for bookkeeping labels 78 * like EPSILON. Char/String literals and token types overlap in the same 79 * space, however. 80 */ 81 protected int maxTokenType = Label.MIN_TOKEN_TYPE-1; 82 83 /** Map token like ID (but not literals like "while") to its token type */ 84 public Map tokenIDToTypeMap = new LinkedHashMap(); 85 86 /** Map token literals like "while" to its token type. It may be that 87 * WHILE="while"=35, in which case both tokenIDToTypeMap and this 88 * field will have entries both mapped to 35. 89 */ 90 public Map<String, Integer> stringLiteralToTypeMap = new LinkedHashMap<String, Integer>(); 91 /** Reverse index for stringLiteralToTypeMap */ 92 public Vector<String> typeToStringLiteralList = new Vector<String>(); 93 94 /** Map a token type to its token name. 95 * Must subtract MIN_TOKEN_TYPE from index. 96 */ 97 public Vector<String> typeToTokenList = new Vector<String>(); 98 99 /** If combined or lexer grammar, track the rules. 100 * Track lexer rules so we can warn about undefined tokens. 101 * This is combined set of lexer rules from all lexer grammars 102 * seen in all imports. 103 */ 104 protected Set<String> lexerRules = new HashSet<String>(); 105 106 /** Rules are uniquely labeled from 1..n among all grammars */ 107 protected int ruleIndex = MIN_RULE_INDEX; 108 109 /** Map a rule index to its name; use a Vector on purpose as new 110 * collections stuff won't let me setSize and make it grow. :( 111 * I need a specific guaranteed index, which the Collections stuff 112 * won't let me have. 113 */ 114 protected Vector<Rule> ruleIndexToRuleList = new Vector<Rule>(); 115 116 public boolean watchNFAConversion = false; 117 initTokenSymbolTables()118 protected void initTokenSymbolTables() { 119 // the faux token types take first NUM_FAUX_LABELS positions 120 // then we must have room for the predefined runtime token types 121 // like DOWN/UP used for tree parsing. 122 typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Label.MIN_TOKEN_TYPE-1); 123 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>"); 124 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>"); 125 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>"); 126 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>"); 127 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR); 128 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF"); 129 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>"); 130 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.DOWN-1, "DOWN"); 131 typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.UP-1, "UP"); 132 tokenIDToTypeMap.put("<INVALID>", Utils.integer(Label.INVALID)); 133 tokenIDToTypeMap.put("<EOT>", Utils.integer(Label.EOT)); 134 tokenIDToTypeMap.put("<SEMPRED>", Utils.integer(Label.SEMPRED)); 135 tokenIDToTypeMap.put("<SET>", Utils.integer(Label.SET)); 136 tokenIDToTypeMap.put("<EPSILON>", Utils.integer(Label.EPSILON)); 137 tokenIDToTypeMap.put("EOF", Utils.integer(Label.EOF)); 138 tokenIDToTypeMap.put("<EOR>", Utils.integer(Label.EOR_TOKEN_TYPE)); 139 tokenIDToTypeMap.put("DOWN", Utils.integer(Label.DOWN)); 140 tokenIDToTypeMap.put("UP", Utils.integer(Label.UP)); 141 } 142 CompositeGrammar()143 public CompositeGrammar() { 144 initTokenSymbolTables(); 145 } 146 CompositeGrammar(Grammar g)147 public CompositeGrammar(Grammar g) { 148 this(); 149 setDelegationRoot(g); 150 } 151 setDelegationRoot(Grammar root)152 public void setDelegationRoot(Grammar root) { 153 delegateGrammarTreeRoot = new CompositeGrammarTree(root); 154 root.compositeTreeNode = delegateGrammarTreeRoot; 155 } 156 getRule(String ruleName)157 public Rule getRule(String ruleName) { 158 return delegateGrammarTreeRoot.getRule(ruleName); 159 } 160 getOption(String key)161 public Object getOption(String key) { 162 return delegateGrammarTreeRoot.getOption(key); 163 } 164 165 /** Add delegate grammar as child of delegator */ addGrammar(Grammar delegator, Grammar delegate)166 public void addGrammar(Grammar delegator, Grammar delegate) { 167 if ( delegator.compositeTreeNode==null ) { 168 delegator.compositeTreeNode = new CompositeGrammarTree(delegator); 169 } 170 delegator.compositeTreeNode.addChild(new CompositeGrammarTree(delegate)); 171 172 /*// find delegator in tree so we can add a child to it 173 CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(delegator); 174 t.addChild(); 175 */ 176 // make sure new grammar shares this composite 177 delegate.composite = this; 178 } 179 180 /** Get parent of this grammar */ getDelegator(Grammar g)181 public Grammar getDelegator(Grammar g) { 182 CompositeGrammarTree me = delegateGrammarTreeRoot.findNode(g); 183 if ( me==null ) { 184 return null; // not found 185 } 186 if ( me.parent!=null ) { 187 return me.parent.grammar; 188 } 189 return null; 190 } 191 192 /** Get list of all delegates from all grammars in the delegate subtree of g. 193 * The grammars are in delegation tree preorder. Don't include g itself 194 * in list as it is not a delegate of itself. 195 */ getDelegates(Grammar g)196 public List<Grammar> getDelegates(Grammar g) { 197 CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); 198 if ( t==null ) { 199 return null; // no delegates 200 } 201 List<Grammar> grammars = t.getPostOrderedGrammarList(); 202 grammars.remove(grammars.size()-1); // remove g (last one) 203 return grammars; 204 } 205 getDirectDelegates(Grammar g)206 public List<Grammar> getDirectDelegates(Grammar g) { 207 CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); 208 List<CompositeGrammarTree> children = t.children; 209 if ( children==null ) { 210 return null; 211 } 212 List<Grammar> grammars = new ArrayList(); 213 for (int i = 0; children!=null && i < children.size(); i++) { 214 CompositeGrammarTree child = (CompositeGrammarTree) children.get(i); 215 grammars.add(child.grammar); 216 } 217 return grammars; 218 } 219 220 /** Get delegates below direct delegates of g */ getIndirectDelegates(Grammar g)221 public List<Grammar> getIndirectDelegates(Grammar g) { 222 List<Grammar> direct = getDirectDelegates(g); 223 List<Grammar> delegates = getDelegates(g); 224 delegates.removeAll(direct); 225 return delegates; 226 } 227 228 /** Return list of delegate grammars from root down to g. 229 * Order is root, ..., g.parent. (g not included). 230 */ getDelegators(Grammar g)231 public List<Grammar> getDelegators(Grammar g) { 232 if ( g==delegateGrammarTreeRoot.grammar ) { 233 return null; 234 } 235 List<Grammar> grammars = new ArrayList(); 236 CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(g); 237 // walk backwards to root, collecting grammars 238 CompositeGrammarTree p = t.parent; 239 while ( p!=null ) { 240 grammars.add(0, p.grammar); // add to head so in order later 241 p = p.parent; 242 } 243 return grammars; 244 } 245 246 /** Get set of rules for grammar g that need to have manual delegation 247 * methods. This is the list of rules collected from all direct/indirect 248 * delegates minus rules overridden in grammar g. 249 * 250 * This returns null except for the delegate root because it is the only 251 * one that has to have a complete grammar rule interface. The delegates 252 * should not be instantiated directly for use as parsers (you can create 253 * them to pass to the root parser's ctor as arguments). 254 */ getDelegatedRules(Grammar g)255 public Set<Rule> getDelegatedRules(Grammar g) { 256 if ( g!=delegateGrammarTreeRoot.grammar ) { 257 return null; 258 } 259 Set<Rule> rules = getAllImportedRules(g); 260 for (Iterator it = rules.iterator(); it.hasNext();) { 261 Rule r = (Rule) it.next(); 262 Rule localRule = g.getLocallyDefinedRule(r.name); 263 // if locally defined or it's not local but synpred, don't make 264 // a delegation method 265 if ( localRule!=null || r.isSynPred ) { 266 it.remove(); // kill overridden rules 267 } 268 } 269 return rules; 270 } 271 272 /** Get all rule definitions from all direct/indirect delegate grammars 273 * of g. 274 */ getAllImportedRules(Grammar g)275 public Set<Rule> getAllImportedRules(Grammar g) { 276 Set<String> ruleNames = new HashSet(); 277 Set<Rule> rules = new HashSet(); 278 CompositeGrammarTree subtreeRoot = delegateGrammarTreeRoot.findNode(g); 279 280 List<Grammar> grammars = subtreeRoot.getPreOrderedGrammarList(); 281 // walk all grammars preorder, priority given to grammar listed first. 282 for (int i = 0; i < grammars.size(); i++) { 283 Grammar delegate = (org.antlr.tool.Grammar) grammars.get(i); 284 // for each rule in delegate, add to rules if no rule with that 285 // name as been seen. (can't use removeAll; wrong hashcode/equals on Rule) 286 for (Iterator it = delegate.getRules().iterator(); it.hasNext();) { 287 Rule r = (Rule)it.next(); 288 if ( !ruleNames.contains(r.name) ) { 289 ruleNames.add(r.name); // track that we've seen this 290 rules.add(r); 291 } 292 } 293 } 294 return rules; 295 } 296 getRootGrammar()297 public Grammar getRootGrammar() { 298 if ( delegateGrammarTreeRoot==null ) { 299 return null; 300 } 301 return delegateGrammarTreeRoot.grammar; 302 } 303 getGrammar(String grammarName)304 public Grammar getGrammar(String grammarName) { 305 CompositeGrammarTree t = delegateGrammarTreeRoot.findNode(grammarName); 306 if ( t!=null ) { 307 return t.grammar; 308 } 309 return null; 310 } 311 312 // NFA spans multiple grammars, must handle here 313 getNewNFAStateNumber()314 public int getNewNFAStateNumber() { 315 return stateCounter++; 316 } 317 addState(NFAState state)318 public void addState(NFAState state) { 319 numberToStateList.setSize(state.stateNumber+1); // make sure we have room 320 numberToStateList.set(state.stateNumber, state); 321 } 322 getState(int s)323 public NFAState getState(int s) { 324 return (NFAState)numberToStateList.get(s); 325 } 326 assignTokenTypes()327 public void assignTokenTypes() throws RecognitionException { 328 // ASSIGN TOKEN TYPES for all delegates (same walker) 329 //System.out.println("### assign types"); 330 AssignTokenTypesWalker ttypesWalker = new AssignTokenTypesBehavior(); 331 List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); 332 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 333 Grammar g = (Grammar)grammars.get(i); 334 ttypesWalker.setTreeNodeStream(new CommonTreeNodeStream(g.getGrammarTree())); 335 try { 336 //System.out.println(" walking "+g.name); 337 ttypesWalker.grammar_(g); 338 } 339 catch (RecognitionException re) { 340 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, 341 re); 342 } 343 } 344 // the walker has filled literals, tokens, and alias tables. 345 // now tell it to define them in the root grammar 346 ttypesWalker.defineTokens(delegateGrammarTreeRoot.grammar); 347 } 348 translateLeftRecursiveRules()349 public void translateLeftRecursiveRules() { 350 List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); 351 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 352 Grammar g = grammars.get(i); 353 if ( !(g.type==Grammar.PARSER || g.type==Grammar.COMBINED) ) continue; 354 for (GrammarAST r : g.grammarTree.findAllType(ANTLRParser.RULE)) { 355 if ( !Character.isUpperCase(r.getChild(0).getText().charAt(0)) ) { 356 if ( LeftRecursiveRuleAnalyzer.hasImmediateRecursiveRuleRefs(r, r.enclosingRuleName) ) { 357 g.translateLeftRecursiveRule(r); 358 } 359 } 360 } 361 } 362 } 363 defineGrammarSymbols()364 public void defineGrammarSymbols() { 365 delegateGrammarTreeRoot.trimLexerImportsIntoCombined(); 366 List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); 367 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 368 Grammar g = grammars.get(i); 369 g.defineGrammarSymbols(); 370 } 371 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 372 Grammar g = grammars.get(i); 373 g.checkNameSpaceAndActions(); 374 } 375 minimizeRuleSet(); 376 } 377 createNFAs()378 public void createNFAs() { 379 if ( ErrorManager.doNotAttemptAnalysis() ) { 380 return; 381 } 382 List<Grammar> grammars = delegateGrammarTreeRoot.getPostOrderedGrammarList(); 383 //System.out.println("### createNFAs for composite; grammars: "+names); 384 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 385 Grammar g = (Grammar)grammars.get(i); 386 g.createRuleStartAndStopNFAStates(); 387 } 388 for (int i = 0; grammars!=null && i < grammars.size(); i++) { 389 Grammar g = (Grammar)grammars.get(i); 390 g.buildNFA(); 391 } 392 } 393 minimizeRuleSet()394 public void minimizeRuleSet() { 395 Set<String> ruleDefs = new HashSet<String>(); 396 _minimizeRuleSet(ruleDefs, delegateGrammarTreeRoot); 397 } 398 _minimizeRuleSet(Set<String> ruleDefs, CompositeGrammarTree p)399 public void _minimizeRuleSet(Set<String> ruleDefs, 400 CompositeGrammarTree p) { 401 Set<String> localRuleDefs = new HashSet<String>(); 402 Set<String> overrides = new HashSet<String>(); 403 // compute set of non-overridden rules for this delegate 404 for (Rule r : p.grammar.getRules()) { 405 if ( !ruleDefs.contains(r.name) ) { 406 localRuleDefs.add(r.name); 407 } 408 else if ( !r.name.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { 409 // record any overridden rule 'cept tokens rule 410 overrides.add(r.name); 411 } 412 } 413 //System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); 414 //System.out.println("overridden rule for "+p.grammar.name+": "+overrides); 415 p.grammar.overriddenRules = overrides; 416 417 // make set of all rules defined thus far walking delegation tree. 418 // the same rule in two delegates resolves in favor of first found 419 // in tree therefore second must not be included 420 ruleDefs.addAll(localRuleDefs); 421 422 // pass larger set of defined rules to delegates 423 if ( p.children!=null ) { 424 for (CompositeGrammarTree delegate : p.children) { 425 _minimizeRuleSet(ruleDefs, delegate); 426 } 427 } 428 } 429 430 /* 431 public void minimizeRuleSet() { 432 Set<Rule> refs = _minimizeRuleSet(delegateGrammarTreeRoot); 433 System.out.println("all rule refs: "+refs); 434 } 435 436 public Set<Rule> _minimizeRuleSet(CompositeGrammarTree p) { 437 Set<Rule> refs = new HashSet<Rule>(); 438 for (GrammarAST refAST : p.grammar.ruleRefs) { 439 System.out.println("ref "+refAST.getText()+": "+refAST.NFAStartState+ 440 " enclosing rule: "+refAST.NFAStartState.enclosingRule+ 441 " invoking rule: "+((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); 442 refs.add(((NFAState)refAST.NFAStartState.transition[0].target).enclosingRule); 443 } 444 445 if ( p.children!=null ) { 446 for (CompositeGrammarTree delegate : p.children) { 447 Set<Rule> delegateRuleRefs = _minimizeRuleSet(delegate); 448 refs.addAll(delegateRuleRefs); 449 } 450 } 451 452 return refs; 453 } 454 */ 455 456 /* 457 public void oldminimizeRuleSet() { 458 // first walk to remove all overridden rules 459 Set<String> ruleDefs = new HashSet<String>(); 460 Set<String> ruleRefs = new HashSet<String>(); 461 for (GrammarAST refAST : delegateGrammarTreeRoot.grammar.ruleRefs) { 462 String rname = refAST.getText(); 463 ruleRefs.add(rname); 464 } 465 _minimizeRuleSet(ruleDefs, 466 ruleRefs, 467 delegateGrammarTreeRoot); 468 System.out.println("overall rule defs: "+ruleDefs); 469 } 470 471 public void _minimizeRuleSet(Set<String> ruleDefs, 472 Set<String> ruleRefs, 473 CompositeGrammarTree p) { 474 Set<String> localRuleDefs = new HashSet<String>(); 475 for (Rule r : p.grammar.getRules()) { 476 if ( !ruleDefs.contains(r.name) ) { 477 localRuleDefs.add(r.name); 478 ruleDefs.add(r.name); 479 } 480 } 481 System.out.println("rule defs for "+p.grammar.name+": "+localRuleDefs); 482 483 // remove locally-defined rules not in ref set 484 // find intersection of local rules and references from delegator 485 // that is set of rules needed by delegator 486 Set<String> localRuleDefsSatisfyingRefsFromBelow = new HashSet<String>(); 487 for (String r : ruleRefs) { 488 if ( localRuleDefs.contains(r) ) { 489 localRuleDefsSatisfyingRefsFromBelow.add(r); 490 } 491 } 492 493 // now get list of refs from localRuleDefsSatisfyingRefsFromBelow. 494 // Those rules are also allowed in this delegate 495 for (GrammarAST refAST : p.grammar.ruleRefs) { 496 if ( localRuleDefsSatisfyingRefsFromBelow.contains(refAST.enclosingRuleName) ) { 497 // found rule ref within needed rule 498 } 499 } 500 501 // remove rule refs not in the new rule def set 502 503 // walk all children, adding rules not already defined 504 if ( p.children!=null ) { 505 for (CompositeGrammarTree delegate : p.children) { 506 _minimizeRuleSet(ruleDefs, ruleRefs, delegate); 507 } 508 } 509 } 510 */ 511 512 /* 513 public void trackNFAStatesThatHaveLabeledEdge(Label label, 514 NFAState stateWithLabeledEdge) 515 { 516 Set<NFAState> states = typeToNFAStatesWithEdgeOfTypeMap.get(label); 517 if ( states==null ) { 518 states = new HashSet<NFAState>(); 519 typeToNFAStatesWithEdgeOfTypeMap.put(label, states); 520 } 521 states.add(stateWithLabeledEdge); 522 } 523 524 public Map<Label, Set<NFAState>> getTypeToNFAStatesWithEdgeOfTypeMap() { 525 return typeToNFAStatesWithEdgeOfTypeMap; 526 } 527 528 public Set<NFAState> getStatesWithEdge(Label label) { 529 return typeToNFAStatesWithEdgeOfTypeMap.get(label); 530 } 531 */ 532 } 533