1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.tool; 29 30 import org.antlr.Tool; 31 import org.antlr.analysis.DFA; 32 import org.antlr.analysis.DFAState; 33 import org.antlr.analysis.LL1Analyzer; 34 import org.antlr.analysis.LL1DFA; 35 import org.antlr.analysis.Label; 36 import org.antlr.analysis.LookaheadSet; 37 import org.antlr.analysis.NFA; 38 import org.antlr.analysis.NFAConversionThread; 39 import org.antlr.analysis.NFAState; 40 import org.antlr.analysis.NFAToDFAConverter; 41 import org.antlr.analysis.SemanticContext; 42 import org.antlr.analysis.Transition; 43 import org.antlr.codegen.CodeGenerator; 44 import org.antlr.codegen.Target; 45 import org.antlr.grammar.v3.ANTLRLexer; 46 import org.antlr.grammar.v3.ANTLRParser; 47 import org.antlr.grammar.v3.ANTLRTreePrinter; 48 import org.antlr.grammar.v3.ActionAnalysis; 49 import org.antlr.grammar.v3.DefineGrammarItemsWalker; 50 import org.antlr.grammar.v3.TreeToNFAConverter; 51 import org.antlr.misc.Barrier; 52 import org.antlr.misc.IntSet; 53 import org.antlr.misc.IntervalSet; 54 import org.antlr.misc.MultiMap; 55 import org.antlr.misc.OrderedHashSet; 56 import org.antlr.misc.Utils; 57 import org.antlr.runtime.ANTLRReaderStream; 58 import org.antlr.runtime.ANTLRStringStream; 59 import org.antlr.runtime.CommonToken; 60 import org.antlr.runtime.CommonTokenStream; 61 import org.antlr.runtime.RecognitionException; 62 import org.antlr.runtime.Token; 63 import org.antlr.runtime.tree.CommonTreeNodeStream; 64 import org.stringtemplate.v4.ST; 65 import org.stringtemplate.v4.STGroup; 66 import org.stringtemplate.v4.STGroupString; 67 68 import java.io.BufferedReader; 69 import java.io.File; 70 import java.io.FileNotFoundException; 71 import java.io.FileReader; 72 import java.io.IOException; 73 import java.io.PrintStream; 74 import java.io.Reader; 75 import java.io.StreamTokenizer; 76 import java.io.StringReader; 77 import java.util.ArrayList; 78 import java.util.Arrays; 79 import java.util.BitSet; 80 import java.util.Collection; 81 import java.util.HashMap; 82 import java.util.HashSet; 83 import java.util.Iterator; 84 import java.util.LinkedHashMap; 85 import java.util.List; 86 import java.util.Map; 87 import java.util.Set; 88 import java.util.Vector; 89 90 /** Represents a grammar in memory. */ 91 public class Grammar { 92 public static final String SYNPRED_RULE_PREFIX = "synpred"; 93 94 public static final String GRAMMAR_FILE_EXTENSION = ".g"; 95 96 /** used for generating lexer temp files */ 97 public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g"; 98 99 public static final int INITIAL_DECISION_LIST_SIZE = 300; 100 public static final int INVALID_RULE_INDEX = -1; 101 102 // the various kinds of labels. t=type, id=ID, types+=type ids+=ID 103 public static final int RULE_LABEL = 1; 104 public static final int TOKEN_LABEL = 2; 105 public static final int RULE_LIST_LABEL = 3; 106 public static final int TOKEN_LIST_LABEL = 4; 107 public static final int CHAR_LABEL = 5; // used in lexer for x='a' 108 public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=. 109 public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=. 110 111 112 public static String[] LabelTypeToString = 113 {"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"}; 114 115 public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens"; 116 public static final String FRAGMENT_RULE_MODIFIER = "fragment"; 117 118 public static final String SYNPREDGATE_ACTION_NAME = "synpredgate"; 119 120 /** When converting ANTLR char and string literals, here is the 121 * value set of escape chars. 122 */ 123 public static int ANTLRLiteralEscapedCharValue[] = new int[255]; 124 125 /** Given a char, we need to be able to show as an ANTLR literal. 126 */ 127 public static String ANTLRLiteralCharValueEscape[] = new String[255]; 128 129 static { 130 ANTLRLiteralEscapedCharValue['n'] = '\n'; 131 ANTLRLiteralEscapedCharValue['r'] = '\r'; 132 ANTLRLiteralEscapedCharValue['t'] = '\t'; 133 ANTLRLiteralEscapedCharValue['b'] = '\b'; 134 ANTLRLiteralEscapedCharValue['f'] = '\f'; 135 ANTLRLiteralEscapedCharValue['\\'] = '\\'; 136 ANTLRLiteralEscapedCharValue['\''] = '\''; 137 ANTLRLiteralEscapedCharValue['"'] = '"'; 138 ANTLRLiteralCharValueEscape['\n'] = "\\n"; 139 ANTLRLiteralCharValueEscape['\r'] = "\\r"; 140 ANTLRLiteralCharValueEscape['\t'] = "\\t"; 141 ANTLRLiteralCharValueEscape['\b'] = "\\b"; 142 ANTLRLiteralCharValueEscape['\f'] = "\\f"; 143 ANTLRLiteralCharValueEscape['\\'] = "\\\\"; 144 ANTLRLiteralCharValueEscape['\''] = "\\'"; 145 } 146 147 public static final int LEXER = 1; 148 public static final int PARSER = 2; 149 public static final int TREE_PARSER = 3; 150 public static final int COMBINED = 4; 151 public static final String[] grammarTypeToString = new String[] { 152 "<invalid>", 153 "lexer", 154 "parser", 155 "tree", 156 "combined" 157 }; 158 159 public static final String[] grammarTypeToFileNameSuffix = new String[] { 160 "<invalid>", 161 "Lexer", 162 "Parser", 163 "", // no suffix for tree grammars 164 "Parser" // if combined grammar, gen Parser and Lexer will be done later 165 }; 166 167 /** Set of valid imports. E.g., can only import a tree parser into 168 * another tree parser. Maps delegate to set of delegator grammar types. 169 * validDelegations.get(LEXER) gives list of the kinds of delegators 170 * that can import lexers. 171 */ 172 public static MultiMap<Integer,Integer> validDelegations = 173 new MultiMap<Integer,Integer>() { 174 { 175 map(LEXER, LEXER); 176 map(LEXER, PARSER); 177 map(LEXER, COMBINED); 178 179 map(PARSER, PARSER); 180 map(PARSER, COMBINED); 181 182 map(TREE_PARSER, TREE_PARSER); 183 184 // TODO: allow COMBINED 185 // map(COMBINED, COMBINED); 186 } 187 }; 188 189 /** This is the buffer of *all* tokens found in the grammar file 190 * including whitespace tokens etc... I use this to extract 191 * lexer rules from combined grammars. 192 */ 193 public CommonTokenStream tokenBuffer; 194 public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__"; 195 public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__"; 196 197 public static class Decision { 198 public Grammar grammar; 199 public int decision; 200 public NFAState startState; 201 public GrammarAST blockAST; 202 public DFA dfa; 203 } 204 205 public class LabelElementPair { 206 public Token label; 207 public GrammarAST elementRef; 208 public String referencedRuleName; 209 /** Has an action referenced the label? Set by ActionAnalysis.g 210 * Currently only set for rule labels. 211 */ 212 public boolean actionReferencesLabel; 213 public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL} LabelElementPair(Token label, GrammarAST elementRef)214 public LabelElementPair(Token label, GrammarAST elementRef) { 215 this.label = label; 216 this.elementRef = elementRef; 217 this.referencedRuleName = elementRef.getText(); 218 } getReferencedRule()219 public Rule getReferencedRule() { 220 return getRule(referencedRuleName); 221 } 222 @Override toString()223 public String toString() { 224 return elementRef.toString(); 225 } 226 } 227 228 /** What name did the user provide for this grammar? */ 229 public String name; 230 231 /** What type of grammar is this: lexer, parser, tree walker */ 232 public int type; 233 234 /** A list of options specified at the grammar level such as language=Java. 235 * The value can be an AST for complicated values such as character sets. 236 * There may be code generator specific options in here. I do no 237 * interpretation of the key/value pairs...they are simply available for 238 * who wants them. 239 */ 240 protected Map<String, Object> options; 241 242 public static final Set<String> legalLexerOptions = 243 new HashSet<String>() { 244 { 245 add("language"); add("tokenVocab"); 246 add("TokenLabelType"); 247 add("superClass"); 248 add("filter"); 249 add("k"); 250 add("backtrack"); 251 add("memoize"); 252 } 253 }; 254 255 public static final Set<String> legalParserOptions = 256 new HashSet<String>() { 257 { 258 add("language"); add("tokenVocab"); 259 add("output"); add("rewrite"); add("ASTLabelType"); 260 add("TokenLabelType"); 261 add("superClass"); 262 add("k"); 263 add("backtrack"); 264 add("memoize"); 265 } 266 }; 267 268 public static final Set<String> legalTreeParserOptions = 269 new HashSet<String>() { 270 { 271 add("language"); add("tokenVocab"); 272 add("output"); add("rewrite"); add("ASTLabelType"); 273 add("TokenLabelType"); 274 add("superClass"); 275 add("k"); 276 add("backtrack"); 277 add("memoize"); 278 add("filter"); 279 } 280 }; 281 282 public static final Set<String> doNotCopyOptionsToLexer = 283 new HashSet<String>() { 284 { 285 add("output"); add("ASTLabelType"); add("superClass"); 286 add("k"); add("backtrack"); add("memoize"); add("rewrite"); 287 } 288 }; 289 290 public static final Map<String, String> defaultOptions = 291 new HashMap<String, String>() { 292 { 293 put("language","Java"); 294 } 295 }; 296 297 public static final Set<String> legalBlockOptions = 298 new HashSet<String>() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}}; 299 300 /** What are the default options for a subrule? */ 301 public static final Map<String, String> defaultBlockOptions = 302 new HashMap<String, String>() {{put("greedy","true");}}; 303 304 public static final Map<String, String> defaultLexerBlockOptions = 305 new HashMap<String, String>() {{put("greedy","true");}}; 306 307 // Token options are here to avoid contaminating Token object in runtime 308 309 /** Legal options for terminal refs like ID<node=MyVarNode> */ 310 public static final Set<String> legalTokenOptions = 311 new HashSet<String>() { 312 { 313 add(defaultTokenOption); 314 add("type"); 315 add("text"); 316 add("assoc"); 317 } 318 }; 319 320 public static final String defaultTokenOption = "node"; 321 322 /** Is there a global fixed lookahead set for this grammar? 323 * If 0, nothing specified. -1 implies we have not looked at 324 * the options table yet to set k. 325 */ 326 protected int global_k = -1; 327 328 /** Map a scope to a map of name:action pairs. 329 * Map<String, Map<String,GrammarAST>> 330 * The code generator will use this to fill holes in the output files. 331 * I track the AST node for the action in case I need the line number 332 * for errors. 333 */ 334 private Map<String, Map<String, Object>> actions = 335 new HashMap<String, Map<String, Object>>(); 336 337 /** The NFA that represents the grammar with edges labelled with tokens 338 * or epsilon. It is more suitable to analysis than an AST representation. 339 */ 340 public NFA nfa; 341 342 protected NFAFactory factory; 343 344 /** If this grammar is part of a larger composite grammar via delegate 345 * statement, then this points at the composite. The composite holds 346 * a global list of rules, token types, decision numbers, etc... 347 */ 348 public CompositeGrammar composite; 349 350 /** A pointer back into grammar tree. Needed so we can add delegates. */ 351 public CompositeGrammarTree compositeTreeNode; 352 353 /** If this is a delegate of another grammar, this is the label used 354 * as an instance var by that grammar to point at this grammar. null 355 * if no label was specified in the delegate statement. 356 */ 357 public String label; 358 359 /** TODO: hook this to the charVocabulary option */ 360 protected IntSet charVocabulary = null; 361 362 /** For ANTLRWorks, we want to be able to map a line:col to a specific 363 * decision DFA so it can display DFA. 364 */ 365 Map<String, DFA> lineColumnToLookaheadDFAMap = new HashMap<String, DFA>(); 366 367 public Tool tool; 368 369 /** The unique set of all rule references in any rule; set of tree node 370 * objects so two refs to same rule can exist but at different line/position. 371 */ 372 protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>(); 373 374 protected Set<GrammarAST> scopedRuleRefs = new HashSet<GrammarAST>(); 375 376 /** The unique set of all token ID references in any rule */ 377 protected Set<Token> tokenIDRefs = new HashSet<Token>(); 378 379 /** Be able to assign a number to every decision in grammar; 380 * decisions in 1..n 381 */ 382 protected int decisionCount = 0; 383 384 /** A list of all rules that are in any left-recursive cycle. There 385 * could be multiple cycles, but this is a flat list of all problematic 386 * rules. This is stuff we couldn't refactor to precedence rule. 387 */ 388 protected Set<Rule> leftRecursiveRules; 389 390 /** An external tool requests that DFA analysis abort prematurely. Stops 391 * at DFA granularity, which are limited to a DFA size and time computation 392 * as failsafe. 393 */ 394 protected boolean externalAnalysisAbort; 395 396 public int numNonLLStar = 0; // hack to track for -report 397 398 /** When we read in a grammar, we track the list of syntactic predicates 399 * and build faux rules for them later. See my blog entry Dec 2, 2005: 400 * http://www.antlr.org/blog/antlr3/lookahead.tml 401 * This maps the name (we make up) for a pred to the AST grammar fragment. 402 */ 403 protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap; 404 405 /** Each left-recursive precedence rule must define precedence array 406 * for binary operators like: 407 * 408 * static int[] e_prec = new int[tokenNames.length]; 409 * static { 410 * e_prec[75] = 1; 411 * } 412 * Track and we push into parser later; this is computed 413 * early when we look for prec rules. 414 */ 415 public List<String> precRuleInitCodeBlocks = new ArrayList<String>(); 416 417 /** At least one rule has memoize=true */ 418 public boolean atLeastOneRuleMemoizes; 419 420 /** At least one backtrack=true in rule or decision or grammar. */ 421 public boolean atLeastOneBacktrackOption; 422 423 /** Was this created from a COMBINED grammar? */ 424 public boolean implicitLexer; 425 426 /** Map a rule to it's Rule object */ 427 protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>(); 428 429 /** If this rule is a delegate, some rules might be overridden; don't 430 * want to gen code for them. 431 */ 432 public Set<String> overriddenRules = new HashSet<String>(); 433 434 /** The list of all rules referenced in this grammar, not defined here, 435 * and defined in a delegate grammar. Not all of these will be generated 436 * in the recognizer for this file; only those that are affected by rule 437 * definitions in this grammar. I am not sure the Java target will need 438 * this but I'm leaving in case other targets need it. 439 * see NameSpaceChecker.lookForReferencesToUndefinedSymbols() 440 */ 441 protected Set<Rule> delegatedRuleReferences = new HashSet<Rule>(); 442 443 /** The ANTLRParser tracks lexer rules when reading combined grammars 444 * so we can build the Tokens rule. 445 */ 446 public List<String> lexerRuleNamesInCombined = new ArrayList<String>(); 447 448 /** Track the scopes defined outside of rules and the scopes associated 449 * with all rules (even if empty). 450 */ 451 protected Map<String, AttributeScope> scopes = new HashMap<String, AttributeScope>(); 452 453 /** An AST that records entire input grammar with all rules. A simple 454 * grammar with one rule, "grammar t; a : A | B ;", looks like: 455 * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) ) 456 */ 457 protected GrammarAST grammarTree = null; 458 459 /** Each subrule/rule is a decision point and we must track them so we 460 * can go back later and build DFA predictors for them. This includes 461 * all the rules, subrules, optional blocks, ()+, ()* etc... 462 */ 463 protected Vector<Decision> indexToDecision = 464 new Vector<Decision>(INITIAL_DECISION_LIST_SIZE); 465 466 /** If non-null, this is the code generator we will use to generate 467 * recognizers in the target language. 468 */ 469 protected CodeGenerator generator; 470 471 public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this); 472 473 public LL1Analyzer ll1Analyzer = new LL1Analyzer(this); 474 475 /** For merged lexer/parsers, we must construct a separate lexer spec. 476 * This is the template for lexer; put the literals first then the 477 * regular rules. We don't need to specify a token vocab import as 478 * I make the new grammar import from the old all in memory; don't want 479 * to force it to read from the disk. Lexer grammar will have same 480 * name as original grammar but will be in different filename. Foo.g 481 * with combined grammar will have FooParser.java generated and 482 * Foo__.g with again Foo inside. It will however generate FooLexer.java 483 * as it's a lexer grammar. A bit odd, but autogenerated. Can tweak 484 * later if we want. 485 */ 486 protected String lexerGrammarTemplate = 487 "grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" + 488 "lexer grammar <name>;\n" + 489 "<if(options)>" + 490 "options {\n" + 491 " <options:{it | <it.name>=<it.value>;<\\n>}>\n" + 492 "}<\\n>\n" + 493 "<endif>\n" + 494 "<if(imports)>import <imports; separator=\", \">;<endif>\n" + 495 "<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" + 496 "<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" + 497 "<rules>\n" + 498 ">>\n"; 499 protected ST lexerGrammarST; 500 501 /** What file name holds this grammar? */ 502 protected String fileName; 503 504 /** How long in ms did it take to build DFAs for this grammar? 505 * If this grammar is a combined grammar, it only records time for 506 * the parser grammar component. This only records the time to 507 * do the LL(*) work; NFA→DFA conversion. 508 */ 509 public long DFACreationWallClockTimeInMS; 510 511 public int numberOfSemanticPredicates = 0; 512 public int numberOfManualLookaheadOptions = 0; 513 public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>(); 514 public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates = 515 new HashSet<Integer>(); 516 517 /** Track decisions with syn preds specified for reporting. 518 * This is the a set of BLOCK type AST nodes. 519 */ 520 public Set<GrammarAST> blocksWithSynPreds = new HashSet<GrammarAST>(); 521 522 /** Track decisions that actually use the syn preds in the DFA. 523 * Computed during NFA to DFA conversion. 524 */ 525 public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>(); 526 527 /** Track names of preds so we can avoid generating preds that aren't used 528 * Computed during NFA to DFA conversion. Just walk accept states 529 * and look for synpreds because that is the only state target whose 530 * incident edges can have synpreds. Same is try for 531 * decisionsWhoseDFAsUsesSynPreds. 532 */ 533 public Set<String> synPredNamesUsedInDFA = new HashSet<String>(); 534 535 /** Track decisions with syn preds specified for reporting. 536 * This is the a set of BLOCK type AST nodes. 537 */ 538 public Set<GrammarAST> blocksWithSemPreds = new HashSet<GrammarAST>(); 539 540 /** Track decisions that actually use the syn preds in the DFA. */ 541 public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet<DFA>(); 542 543 protected boolean allDecisionDFACreated = false; 544 545 /** We need a way to detect when a lexer grammar is autogenerated from 546 * another grammar or we are just sending in a string representing a 547 * grammar. We don't want to generate a .tokens file, for example, 548 * in such cases. 549 */ 550 protected boolean builtFromString = false; 551 552 /** Factored out the sanity checking code; delegate to it. */ 553 GrammarSanity sanity = new GrammarSanity(this); 554 555 /** Useful for asking questions about target during analysis */ 556 Target target; 557 558 /** Create a grammar from file name. */ Grammar(Tool tool, String fileName, CompositeGrammar composite)559 public Grammar(Tool tool, String fileName, CompositeGrammar composite) { 560 this.composite = composite; 561 setTool(tool); 562 setFileName(fileName); 563 // ensure we have the composite set to something 564 if ( composite.delegateGrammarTreeRoot==null ) { 565 composite.setDelegationRoot(this); 566 } 567 STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); 568 lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); 569 target = CodeGenerator.loadLanguageTarget((String) getOption("language")); 570 } 571 572 /** Useful for when you are sure that you are not part of a composite 573 * already. Used in Interp/RandomPhrase and testing. 574 */ Grammar()575 public Grammar() { this((Tool)null); } 576 Grammar(Tool tool)577 public Grammar(Tool tool) { 578 setTool(tool); 579 builtFromString = true; 580 composite = new CompositeGrammar(this); 581 STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); 582 lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); 583 target = CodeGenerator.loadLanguageTarget((String)getOption("language")); 584 } 585 586 /** Used for testing; only useful on noncomposite grammars.*/ Grammar(String grammarString)587 public Grammar(String grammarString) 588 throws RecognitionException 589 { 590 this(null, grammarString); 591 } 592 593 /** Used for testing and Interp/RandomPhrase. Only useful on 594 * noncomposite grammars. 595 */ Grammar(Tool tool, String grammarString)596 public Grammar(Tool tool, String grammarString) 597 throws RecognitionException 598 { 599 this(tool); 600 setFileName("<string>"); 601 StringReader r = new StringReader(grammarString); 602 parseAndBuildAST(r); 603 composite.assignTokenTypes(); 604 //composite.translateLeftRecursiveRules(); 605 addRulesForSyntacticPredicates(); 606 composite.defineGrammarSymbols(); 607 //composite.createNFAs(); 608 checkNameSpaceAndActions(); 609 } 610 setFileName(String fileName)611 public void setFileName(String fileName) { 612 this.fileName = fileName; 613 } 614 getFileName()615 public String getFileName() { 616 return fileName; 617 } 618 setName(String name)619 public void setName(String name) { 620 if ( name==null ) { 621 return; 622 } 623 // don't error check autogenerated files (those with '__' in them) 624 String saneFile = fileName.replace('\\', '/'); 625 int lastSlash = saneFile.lastIndexOf('/'); 626 String onlyFileName = saneFile.substring(lastSlash+1, fileName.length()); 627 if ( !builtFromString ) { 628 int lastDot = onlyFileName.lastIndexOf('.'); 629 String onlyFileNameNoSuffix; 630 if ( lastDot < 0 ) { 631 ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName); 632 onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION; 633 } 634 else { 635 onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot); 636 } 637 if ( !name.equals(onlyFileNameNoSuffix) ) { 638 ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER, 639 name, 640 fileName); 641 } 642 } 643 this.name = name; 644 } 645 setGrammarContent(String grammarString)646 public void setGrammarContent(String grammarString) throws RecognitionException { 647 StringReader r = new StringReader(grammarString); 648 parseAndBuildAST(r); 649 composite.assignTokenTypes(); 650 composite.defineGrammarSymbols(); 651 } 652 parseAndBuildAST()653 public void parseAndBuildAST() 654 throws IOException 655 { 656 FileReader fr; 657 BufferedReader br = null; 658 try { 659 fr = new FileReader(fileName); 660 br = new BufferedReader(fr); 661 parseAndBuildAST(br); 662 br.close(); 663 br = null; 664 } 665 finally { 666 if ( br!=null ) { 667 br.close(); 668 } 669 } 670 } 671 parseAndBuildAST(Reader r)672 public void parseAndBuildAST(Reader r) { 673 // BUILD AST FROM GRAMMAR 674 ANTLRLexer lexer; 675 try { 676 lexer = new ANTLRLexer(new ANTLRReaderStream(r)); 677 } catch (IOException e) { 678 ErrorManager.internalError("unexpected stream error from parsing "+fileName, e); 679 return; 680 } 681 682 lexer.setFileName(this.getFileName()); 683 tokenBuffer = new CommonTokenStream(lexer); 684 ANTLRParser parser = ANTLRParser.createParser(tokenBuffer); 685 parser.setFileName(this.getFileName()); 686 ANTLRParser.grammar__return result = null; 687 try { 688 result = parser.grammar_(this); 689 } 690 catch (RecognitionException re) { 691 ErrorManager.internalError("unexpected parser recognition error from "+fileName, re); 692 } 693 694 dealWithTreeFilterMode(); // tree grammar and filter=true? 695 696 if ( lexer.hasASTOperator && !buildAST() ) { 697 Object value = getOption("output"); 698 if ( value == null ) { 699 ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION, 700 this, null); 701 setOption("output", "AST", null); 702 } 703 else { 704 ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION, 705 this, null, value); 706 } 707 } 708 709 setGrammarTree(result.getTree()); 710 711 //if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree()); 712 713 grammarTree.setUnknownTokenBoundaries(); 714 715 setFileName(lexer.getFileName()); // the lexer #src might change name 716 if ( grammarTree.findFirstType(ANTLRParser.RULE)==null ) { 717 ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName()); 718 } 719 } 720 dealWithTreeFilterMode()721 protected void dealWithTreeFilterMode() { 722 Object filterMode = (String)getOption("filter"); 723 if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) { 724 // check for conflicting options 725 // filter => backtrack=true 726 // filter&&output=AST => rewrite=true 727 // filter&&output!=AST => error 728 // any deviation from valid option set is an error 729 Object backtrack = (String)getOption("backtrack"); 730 Object output = getOption("output"); 731 Object rewrite = getOption("rewrite"); 732 if ( backtrack!=null && !backtrack.toString().equals("true") ) { 733 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 734 "backtrack", backtrack); 735 } 736 if ( output!=null && !output.toString().equals("AST") ) { 737 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 738 "output", output); 739 setOption("output", "", null); 740 } 741 if ( rewrite!=null && !rewrite.toString().equals("true") ) { 742 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 743 "rewrite", rewrite); 744 } 745 // set options properly 746 setOption("backtrack", "true", null); 747 if ( output!=null && output.toString().equals("AST") ) { 748 setOption("rewrite", "true", null); 749 } 750 // @synpredgate set to state.backtracking==1 by code gen when filter=true 751 // superClass set in template target::treeParser 752 } 753 } 754 translateLeftRecursiveRule(GrammarAST ruleAST)755 public void translateLeftRecursiveRule(GrammarAST ruleAST) { 756 //System.out.println(ruleAST.toStringTree()); 757 CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST); 758 LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = 759 new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName); 760 boolean isLeftRec = false; 761 try { 762 //System.out.println("TESTING "+ruleAST.enclosingRuleName); 763 isLeftRec = leftRecursiveRuleWalker.rec_rule(this); 764 } 765 catch (RecognitionException re) { 766 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); 767 } 768 if ( !isLeftRec ) return; 769 List<String> rules = new ArrayList<String>(); 770 rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ; 771 rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() ); 772 rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() ); 773 for (String r : rules) { 774 GrammarAST t = parseArtificialRule(r); 775 addRule(grammarTree, t); 776 //System.out.println(t.toStringTree()); 777 } 778 779 //precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() ); 780 } 781 defineGrammarSymbols()782 public void defineGrammarSymbols() { 783 if ( Tool.internalOption_PrintGrammarTree ) { 784 System.out.println(grammarTree.toStringList()); 785 } 786 787 // DEFINE RULES 788 //System.out.println("### define "+name+" rules"); 789 DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree())); 790 try { 791 defineItemsWalker.grammar_(this); 792 } 793 catch (RecognitionException re) { 794 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, 795 re); 796 } 797 } 798 799 /** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */ checkNameSpaceAndActions()800 public void checkNameSpaceAndActions() { 801 examineAllExecutableActions(); 802 checkAllRulesForUselessLabels(); 803 804 nameSpaceChecker.checkConflicts(); 805 } 806 807 /** Many imports are illegal such as lexer into a tree grammar */ validImport(Grammar delegate)808 public boolean validImport(Grammar delegate) { 809 List<Integer> validDelegators = validDelegations.get(delegate.type); 810 return validDelegators!=null && validDelegators.contains(this.type); 811 } 812 813 /** If the grammar is a combined grammar, return the text of the implicit 814 * lexer grammar. 815 */ getLexerGrammar()816 public String getLexerGrammar() { 817 if ( lexerGrammarST.getAttribute("literals")==null && 818 lexerGrammarST.getAttribute("rules")==null ) 819 { 820 // if no rules, return nothing 821 return null; 822 } 823 lexerGrammarST.add("name", name); 824 // if there are any actions set for lexer, pass them in 825 if ( getActions().get("lexer")!=null ) { 826 lexerGrammarST.add("actionNames", 827 getActions().get("lexer").keySet()); 828 lexerGrammarST.add("actions", 829 getActions().get("lexer").values()); 830 } 831 // make sure generated grammar has the same options 832 if ( options!=null ) { 833 for (String optionName : options.keySet()) { 834 if ( !doNotCopyOptionsToLexer.contains(optionName) ) { 835 Object value = options.get(optionName); 836 lexerGrammarST.addAggr("options.{name,value}", optionName, value); 837 } 838 } 839 } 840 return lexerGrammarST.render(); 841 } 842 getImplicitlyGeneratedLexerFileName()843 public String getImplicitlyGeneratedLexerFileName() { 844 return name+ 845 IGNORE_STRING_IN_GRAMMAR_FILE_NAME + 846 LEXER_GRAMMAR_FILE_EXTENSION; 847 } 848 849 /** Get the name of the generated recognizer; may or may not be same 850 * as grammar name. 851 * Recognizer is TParser and TLexer from T if combined, else 852 * just use T regardless of grammar type. 853 */ getRecognizerName()854 public String getRecognizerName() { 855 String suffix = ""; 856 List<Grammar> grammarsFromRootToMe = composite.getDelegators(this); 857 //System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe); 858 String qualifiedName = name; 859 if ( grammarsFromRootToMe!=null ) { 860 StringBuilder buf = new StringBuilder(); 861 for (Grammar g : grammarsFromRootToMe) { 862 buf.append(g.name); 863 buf.append('_'); 864 } 865 buf.append(name); 866 qualifiedName = buf.toString(); 867 } 868 if ( type==Grammar.COMBINED || 869 (type==Grammar.LEXER && implicitLexer) ) 870 { 871 suffix = Grammar.grammarTypeToFileNameSuffix[type]; 872 } 873 return qualifiedName+suffix; 874 } 875 876 /** Parse a rule we add artificially that is a list of the other lexer 877 * rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke 878 * this to set the current token. Add char literals before 879 * the rule references. 880 * 881 * If in filter mode, we want every alt to backtrack and we need to 882 * do k=1 to force the "first token def wins" rule. Otherwise, the 883 * longest-match rule comes into play with LL(*). 884 * 885 * The ANTLRParser antlr.g file now invokes this when parsing a lexer 886 * grammar, which I think is proper even though it peeks at the info 887 * that later phases will (re)compute. It gets a list of lexer rules 888 * and builds a string representing the rule; then it creates a parser 889 * and adds the resulting tree to the grammar's tree. 890 */ addArtificialMatchTokensRule(GrammarAST grammarAST, List<String> ruleNames, List<String> delegateNames, boolean filterMode)891 public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST, 892 List<String> ruleNames, 893 List<String> delegateNames, 894 boolean filterMode) { 895 ST matchTokenRuleST; 896 if ( filterMode ) { 897 matchTokenRuleST = new ST( 898 ARTIFICIAL_TOKENS_RULENAME+ 899 " options {k=1; backtrack=true;} : <rules; separator=\"|\">;"); 900 } 901 else { 902 matchTokenRuleST = new ST( 903 ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;"); 904 } 905 906 // Now add token rule references 907 for (int i = 0; i < ruleNames.size(); i++) { 908 String rname = ruleNames.get(i); 909 matchTokenRuleST.add("rules", rname); 910 } 911 for (int i = 0; i < delegateNames.size(); i++) { 912 String dname = delegateNames.get(i); 913 matchTokenRuleST.add("rules", dname+".Tokens"); 914 } 915 //System.out.println("tokens rule: "+matchTokenRuleST.toString()); 916 GrammarAST r = parseArtificialRule(matchTokenRuleST.render()); 917 addRule(grammarAST, r); 918 //addRule((GrammarAST)parser.getAST()); 919 //return (GrammarAST)parser.getAST(); 920 return r; 921 } 922 parseArtificialRule(String ruleText)923 public GrammarAST parseArtificialRule(String ruleText) { 924 ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); 925 ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer)); 926 parser.setGrammar(this); 927 parser.setGrammarType(this.type); 928 try { 929 ANTLRParser.rule_return result = parser.rule(); 930 return result.getTree(); 931 } 932 catch (Exception e) { 933 ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE, 934 e); 935 return null; 936 } 937 } 938 addRule(GrammarAST grammarTree, GrammarAST t)939 public void addRule(GrammarAST grammarTree, GrammarAST t) { 940 GrammarAST p = null; 941 for (int i = 0; i < grammarTree.getChildCount(); i++ ) { 942 p = (GrammarAST)grammarTree.getChild(i); 943 if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) { 944 break; 945 } 946 } 947 948 if (p != null) { 949 grammarTree.addChild(t); 950 } 951 } 952 953 /** for any syntactic predicates, we need to define rules for them; they will get 954 * defined automatically like any other rule. :) 955 */ getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)956 protected List<? extends GrammarAST> getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap) 957 { 958 List<GrammarAST> rules = new ArrayList<GrammarAST>(); 959 if ( nameToSynpredASTMap==null ) { 960 return rules; 961 } 962 boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR; 963 for (Map.Entry<String, GrammarAST> entry : nameToSynpredASTMap.entrySet()) { 964 String synpredName = entry.getKey(); 965 GrammarAST fragmentAST = entry.getValue(); 966 GrammarAST ruleAST = 967 ANTLRParser.createSimpleRuleAST(synpredName, 968 fragmentAST, 969 isLexer); 970 rules.add(ruleAST); 971 } 972 return rules; 973 } 974 addRulesForSyntacticPredicates()975 public void addRulesForSyntacticPredicates() { 976 // Get syn pred rules and add to existing tree 977 List<? extends GrammarAST> synpredRules = 978 getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap); 979 for (int i = 0; i < synpredRules.size(); i++) { 980 GrammarAST rAST = (GrammarAST) synpredRules.get(i); 981 grammarTree.addChild(rAST); 982 } 983 } 984 985 /** Walk the list of options, altering this Grammar object according 986 * to any I recognize. 987 protected void processOptions() { 988 Iterator optionNames = options.keySet().iterator(); 989 while (optionNames.hasNext()) { 990 String optionName = (String) optionNames.next(); 991 Object value = options.get(optionName); 992 if ( optionName.equals("tokenVocab") ) { 993 994 } 995 } 996 } 997 */ 998 999 /** Define all the rule begin/end NFAStates to solve forward reference 1000 * issues. Critical for composite grammars too. 1001 * This is normally called on all root/delegates manually and then 1002 * buildNFA() is called afterwards because the NFA construction needs 1003 * to see rule start/stop states from potentially every grammar. Has 1004 * to be have these created a priori. Testing routines will often 1005 * just call buildNFA(), which forces a call to this method if not 1006 * done already. Works ONLY for single noncomposite grammars. 1007 */ createRuleStartAndStopNFAStates()1008 public void createRuleStartAndStopNFAStates() { 1009 //System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs"); 1010 if ( nfa!=null ) { 1011 return; 1012 } 1013 nfa = new NFA(this); 1014 factory = new NFAFactory(nfa); 1015 1016 Collection<Rule> rules = getRules(); 1017 for (Rule r : rules) { 1018 String ruleName = r.name; 1019 NFAState ruleBeginState = factory.newState(); 1020 ruleBeginState.setDescription("rule "+ruleName+" start"); 1021 ruleBeginState.enclosingRule = r; 1022 r.startState = ruleBeginState; 1023 NFAState ruleEndState = factory.newState(); 1024 ruleEndState.setDescription("rule "+ruleName+" end"); 1025 ruleEndState.setAcceptState(true); 1026 ruleEndState.enclosingRule = r; 1027 r.stopState = ruleEndState; 1028 } 1029 } 1030 buildNFA()1031 public void buildNFA() { 1032 if ( nfa==null ) { 1033 createRuleStartAndStopNFAStates(); 1034 } 1035 if ( nfa.complete ) { 1036 // don't let it create more than once; has side-effects 1037 return; 1038 } 1039 //System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs"); 1040 if ( getRules().isEmpty() ) { 1041 return; 1042 } 1043 1044 CommonTreeNodeStream input = new CommonTreeNodeStream(getGrammarTree()); 1045 TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(input, this, nfa, factory); 1046 try { 1047 nfaBuilder.grammar_(); 1048 } 1049 catch (RecognitionException re) { 1050 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, 1051 name, 1052 re); 1053 } 1054 nfa.complete = true; 1055 } 1056 1057 /** For each decision in this grammar, compute a single DFA using the 1058 * NFA states associated with the decision. The DFA construction 1059 * determines whether or not the alternatives in the decision are 1060 * separable using a regular lookahead language. 1061 * 1062 * Store the lookahead DFAs in the AST created from the user's grammar 1063 * so the code generator or whoever can easily access it. 1064 * 1065 * This is a separate method because you might want to create a 1066 * Grammar without doing the expensive analysis. 1067 */ createLookaheadDFAs()1068 public void createLookaheadDFAs() { 1069 createLookaheadDFAs(true); 1070 } 1071 createLookaheadDFAs(boolean wackTempStructures)1072 public void createLookaheadDFAs(boolean wackTempStructures) { 1073 if ( nfa==null ) { 1074 buildNFA(); 1075 } 1076 1077 // CHECK FOR LEFT RECURSION; Make sure we can actually do analysis 1078 checkAllRulesForLeftRecursion(); 1079 1080 /* 1081 // was there a severe problem while sniffing the grammar? 1082 if ( ErrorManager.doNotAttemptAnalysis() ) { 1083 return; 1084 } 1085 */ 1086 1087 long start = System.currentTimeMillis(); 1088 1089 //System.out.println("### create DFAs"); 1090 int numDecisions = getNumberOfDecisions(); 1091 if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) { 1092 for (int decision=1; decision<=numDecisions; decision++) { 1093 NFAState decisionStartState = getDecisionNFAStartState(decision); 1094 if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) { 1095 // don't bother to process decisions within left recursive rules. 1096 if ( composite.watchNFAConversion ) { 1097 System.out.println("ignoring decision "+decision+ 1098 " within left-recursive rule "+decisionStartState.enclosingRule.name); 1099 } 1100 continue; 1101 } 1102 if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) { 1103 Rule r = decisionStartState.enclosingRule; 1104 if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) { 1105 continue; 1106 } 1107 DFA dfa = null; 1108 // if k=* or k=1, try LL(1) 1109 if ( getUserMaxLookahead(decision)==0 || 1110 getUserMaxLookahead(decision)==1 ) 1111 { 1112 dfa = createLL_1_LookaheadDFA(decision); 1113 } 1114 if ( dfa==null ) { 1115 if ( composite.watchNFAConversion ) { 1116 System.out.println("decision "+decision+ 1117 " not suitable for LL(1)-optimized DFA analysis"); 1118 } 1119 dfa = createLookaheadDFA(decision, wackTempStructures); 1120 } 1121 if ( dfa.startState==null ) { 1122 // something went wrong; wipe out DFA 1123 setLookaheadDFA(decision, null); 1124 } 1125 if ( Tool.internalOption_PrintDFA ) { 1126 System.out.println("DFA d="+decision); 1127 FASerializer serializer = new FASerializer(nfa.grammar); 1128 String result = serializer.serialize(dfa.startState); 1129 System.out.println(result); 1130 } 1131 } 1132 } 1133 } 1134 else { 1135 ErrorManager.info("two-threaded DFA conversion"); 1136 // create a barrier expecting n DFA and this main creation thread 1137 Barrier barrier = new Barrier(3); 1138 // assume 2 CPU for now 1139 int midpoint = numDecisions/2; 1140 NFAConversionThread t1 = 1141 new NFAConversionThread(this, barrier, 1, midpoint); 1142 new Thread(t1).start(); 1143 if ( midpoint == (numDecisions/2) ) { 1144 midpoint++; 1145 } 1146 NFAConversionThread t2 = 1147 new NFAConversionThread(this, barrier, midpoint, numDecisions); 1148 new Thread(t2).start(); 1149 // wait for these two threads to finish 1150 try { 1151 barrier.waitForRelease(); 1152 } 1153 catch(InterruptedException e) { 1154 ErrorManager.internalError("what the hell? DFA interruptus", e); 1155 } 1156 } 1157 1158 long stop = System.currentTimeMillis(); 1159 DFACreationWallClockTimeInMS = stop - start; 1160 1161 // indicate that we've finished building DFA (even if #decisions==0) 1162 allDecisionDFACreated = true; 1163 } 1164 createLL_1_LookaheadDFA(int decision)1165 public DFA createLL_1_LookaheadDFA(int decision) { 1166 Decision d = getDecision(decision); 1167 String enclosingRule = d.startState.enclosingRule.name; 1168 Rule r = d.startState.enclosingRule; 1169 NFAState decisionStartState = getDecisionNFAStartState(decision); 1170 1171 if ( composite.watchNFAConversion ) { 1172 System.out.println("--------------------\nattempting LL(1) DFA (d=" 1173 +decisionStartState.getDecisionNumber()+") for "+ 1174 decisionStartState.getDescription()); 1175 } 1176 1177 if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) { 1178 return null; 1179 } 1180 1181 // compute lookahead for each alt 1182 int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState); 1183 LookaheadSet[] altLook = new LookaheadSet[numAlts+1]; 1184 for (int alt = 1; alt <= numAlts; alt++) { 1185 int walkAlt = 1186 decisionStartState.translateDisplayAltToWalkAlt(alt); 1187 NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt); 1188 NFAState altStartState = (NFAState)altLeftEdge.transition[0].target; 1189 //System.out.println("alt "+alt+" start state = "+altStartState.stateNumber); 1190 altLook[alt] = ll1Analyzer.LOOK(altStartState); 1191 //System.out.println("alt "+alt+": "+altLook[alt].toString(this)); 1192 } 1193 1194 // compare alt i with alt j for disjointness 1195 boolean decisionIsLL_1 = true; 1196 outer: 1197 for (int i = 1; i <= numAlts; i++) { 1198 for (int j = i+1; j <= numAlts; j++) { 1199 /* 1200 System.out.println("compare "+i+", "+j+": "+ 1201 altLook[i].toString(this)+" with "+ 1202 altLook[j].toString(this)); 1203 */ 1204 LookaheadSet collision = altLook[i].intersection(altLook[j]); 1205 if ( !collision.isNil() ) { 1206 //System.out.println("collision (non-LL(1)): "+collision.toString(this)); 1207 decisionIsLL_1 = false; 1208 break outer; 1209 } 1210 } 1211 } 1212 1213 boolean foundConfoundingPredicate = 1214 ll1Analyzer.detectConfoundingPredicates(decisionStartState); 1215 if ( decisionIsLL_1 && !foundConfoundingPredicate ) { 1216 // build an LL(1) optimized DFA with edge for each altLook[i] 1217 if ( NFAToDFAConverter.debug ) { 1218 System.out.println("decision "+decision+" is simple LL(1)"); 1219 } 1220 DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook); 1221 setLookaheadDFA(decision, lookaheadDFA); 1222 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1223 return lookaheadDFA; 1224 } 1225 1226 // not LL(1) but perhaps we can solve with simplified predicate search 1227 // even if k=1 set manually, only resolve here if we have preds; i.e., 1228 // don't resolve etc... 1229 1230 /* 1231 SemanticContext visiblePredicates = 1232 ll1Analyzer.getPredicates(decisionStartState); 1233 boolean foundConfoundingPredicate = 1234 ll1Analyzer.detectConfoundingPredicates(decisionStartState); 1235 */ 1236 1237 // exit if not forced k=1 or we found a predicate situation we 1238 // can't handle: predicates in rules invoked from this decision. 1239 if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1 1240 !getAutoBacktrackMode(decision) || 1241 foundConfoundingPredicate ) 1242 { 1243 //System.out.println("trying LL(*)"); 1244 return null; 1245 } 1246 1247 List<IntervalSet> edges = new ArrayList<IntervalSet>(); 1248 for (int i = 1; i < altLook.length; i++) { 1249 LookaheadSet s = altLook[i]; 1250 edges.add(s.tokenTypeSet); 1251 } 1252 List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges); 1253 //System.out.println("disjoint="+disjoint); 1254 1255 MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>(); 1256 for (int i = 0; i < disjoint.size(); i++) { 1257 IntervalSet ds = disjoint.get(i); 1258 for (int alt = 1; alt < altLook.length; alt++) { 1259 LookaheadSet look = altLook[alt]; 1260 if ( !ds.and(look.tokenTypeSet).isNil() ) { 1261 edgeMap.map(ds, alt); 1262 } 1263 } 1264 } 1265 //System.out.println("edge map: "+edgeMap); 1266 1267 // TODO: how do we know we covered stuff? 1268 1269 // build an LL(1) optimized DFA with edge for each altLook[i] 1270 DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap); 1271 setLookaheadDFA(decision, lookaheadDFA); 1272 1273 // create map from line:col to decision DFA (for ANTLRWorks) 1274 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1275 1276 return lookaheadDFA; 1277 } 1278 updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA)1279 private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) { 1280 GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber); 1281 int line = decisionAST.getLine(); 1282 int col = decisionAST.getCharPositionInLine(); 1283 lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line).append(":") 1284 .append(col).toString(), lookaheadDFA); 1285 } 1286 makeEdgeSetsDisjoint(List<IntervalSet> edges)1287 protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) { 1288 OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>(); 1289 // walk each incoming edge label/set and add to disjoint set 1290 int numEdges = edges.size(); 1291 for (int e = 0; e < numEdges; e++) { 1292 IntervalSet t = edges.get(e); 1293 if ( disjointSets.contains(t) ) { // exact set present 1294 continue; 1295 } 1296 1297 // compare t with set i for disjointness 1298 IntervalSet remainder = t; // remainder starts out as whole set to add 1299 int numDisjointElements = disjointSets.size(); 1300 for (int i = 0; i < numDisjointElements; i++) { 1301 IntervalSet s_i = disjointSets.get(i); 1302 1303 if ( t.and(s_i).isNil() ) { // nothing in common 1304 continue; 1305 } 1306 //System.out.println(label+" collides with "+rl); 1307 1308 // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) 1309 // (ignoring s_i-t if nil; don't put in list) 1310 1311 // Replace existing s_i with intersection since we 1312 // know that will always be a non nil character class 1313 IntervalSet intersection = s_i.and(t); 1314 disjointSets.set(i, intersection); 1315 1316 // Compute s_i-t to see what is in current set and not in incoming 1317 IntervalSet existingMinusNewElements = s_i.subtract(t); 1318 //System.out.println(s_i+"-"+t+"="+existingMinusNewElements); 1319 if ( !existingMinusNewElements.isNil() ) { 1320 // found a new character class, add to the end (doesn't affect 1321 // outer loop duration due to n computation a priori. 1322 disjointSets.add(existingMinusNewElements); 1323 } 1324 1325 // anything left to add to the reachableLabels? 1326 remainder = t.subtract(s_i); 1327 if ( remainder.isNil() ) { 1328 break; // nothing left to add to set. done! 1329 } 1330 1331 t = remainder; 1332 } 1333 if ( !remainder.isNil() ) { 1334 disjointSets.add(remainder); 1335 } 1336 } 1337 return disjointSets.elements(); 1338 } 1339 createLookaheadDFA(int decision, boolean wackTempStructures)1340 public DFA createLookaheadDFA(int decision, boolean wackTempStructures) { 1341 Decision d = getDecision(decision); 1342 String enclosingRule = d.startState.enclosingRule.name; 1343 Rule r = d.startState.enclosingRule; 1344 1345 //System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA); 1346 NFAState decisionStartState = getDecisionNFAStartState(decision); 1347 long startDFA=0,stopDFA; 1348 if ( composite.watchNFAConversion ) { 1349 System.out.println("--------------------\nbuilding lookahead DFA (d=" 1350 +decisionStartState.getDecisionNumber()+") for "+ 1351 decisionStartState.getDescription()); 1352 startDFA = System.currentTimeMillis(); 1353 } 1354 1355 DFA lookaheadDFA = new DFA(decision, decisionStartState); 1356 // Retry to create a simpler DFA if analysis failed (non-LL(*), 1357 // recursion overflow, or time out). 1358 boolean failed = 1359 lookaheadDFA.probe.isNonLLStarDecision() || 1360 lookaheadDFA.probe.analysisOverflowed(); 1361 if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) { 1362 // set k=1 option and try again. 1363 // First, clean up tracking stuff 1364 decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA); 1365 // TODO: clean up synPredNamesUsedInDFA also (harder) 1366 d.blockAST.setBlockOption(this, "k", Utils.integer(1)); 1367 if ( composite.watchNFAConversion ) { 1368 System.out.print("trying decision "+decision+ 1369 " again with k=1; reason: "+ 1370 lookaheadDFA.getReasonForFailure()); 1371 } 1372 lookaheadDFA = null; // make sure other memory is "free" before redoing 1373 lookaheadDFA = new DFA(decision, decisionStartState); 1374 } 1375 1376 setLookaheadDFA(decision, lookaheadDFA); 1377 1378 if ( wackTempStructures ) { 1379 for (DFAState s : lookaheadDFA.getUniqueStates().values()) { 1380 s.reset(); 1381 } 1382 } 1383 1384 // create map from line:col to decision DFA (for ANTLRWorks) 1385 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1386 1387 if ( composite.watchNFAConversion ) { 1388 stopDFA = System.currentTimeMillis(); 1389 System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+ 1390 " states, "+(int)(stopDFA-startDFA)+" ms"); 1391 } 1392 //System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA); 1393 return lookaheadDFA; 1394 } 1395 1396 /** Terminate DFA creation (grammar analysis). 1397 */ externallyAbortNFAToDFAConversion()1398 public void externallyAbortNFAToDFAConversion() { 1399 externalAnalysisAbort = true; 1400 } 1401 NFAToDFAConversionExternallyAborted()1402 public boolean NFAToDFAConversionExternallyAborted() { 1403 return externalAnalysisAbort; 1404 } 1405 1406 /** Return a new unique integer in the token type space */ getNewTokenType()1407 public int getNewTokenType() { 1408 composite.maxTokenType++; 1409 return composite.maxTokenType; 1410 } 1411 1412 /** Define a token at a particular token type value. Blast an 1413 * old value with a new one. This is called normal grammar processsing 1414 * and during import vocab operations to set tokens with specific values. 1415 */ defineToken(String text, int tokenType)1416 public void defineToken(String text, int tokenType) { 1417 //System.out.println("defineToken("+text+", "+tokenType+")"); 1418 if ( composite.tokenIDToTypeMap.get(text)!=null ) { 1419 // already defined? Must be predefined one like EOF; 1420 // do nothing 1421 return; 1422 } 1423 // the index in the typeToTokenList table is actually shifted to 1424 // hold faux labels as you cannot have negative indices. 1425 if ( text.charAt(0)=='\'' ) { 1426 composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType)); 1427 // track in reverse index too 1428 if ( tokenType>=composite.typeToStringLiteralList.size() ) { 1429 composite.typeToStringLiteralList.setSize(tokenType+1); 1430 } 1431 composite.typeToStringLiteralList.set(tokenType, text); 1432 } 1433 else { // must be a label like ID 1434 composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType)); 1435 } 1436 int index = Label.NUM_FAUX_LABELS+tokenType-1; 1437 //System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index); 1438 composite.maxTokenType = Math.max(composite.maxTokenType, tokenType); 1439 if ( index>=composite.typeToTokenList.size() ) { 1440 composite.typeToTokenList.setSize(index+1); 1441 } 1442 String prevToken = composite.typeToTokenList.get(index); 1443 if ( prevToken==null || prevToken.charAt(0)=='\'' ) { 1444 // only record if nothing there before or if thing before was a literal 1445 composite.typeToTokenList.set(index, text); 1446 } 1447 } 1448 1449 /** Define a new rule. A new rule index is created by incrementing 1450 * ruleIndex. 1451 */ defineRule(Token ruleToken, String modifier, Map<String, Object> options, GrammarAST tree, GrammarAST argActionAST, int numAlts)1452 public void defineRule(Token ruleToken, 1453 String modifier, 1454 Map<String, Object> options, 1455 GrammarAST tree, 1456 GrammarAST argActionAST, 1457 int numAlts) 1458 { 1459 String ruleName = ruleToken.getText(); 1460 if ( getLocallyDefinedRule(ruleName)!=null ) { 1461 ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION, 1462 this, ruleToken, ruleName); 1463 return; 1464 } 1465 1466 if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) && 1467 Character.isUpperCase(ruleName.charAt(0)) ) 1468 { 1469 ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED, 1470 this, ruleToken, ruleName); 1471 return; 1472 } 1473 1474 Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts); 1475 /* 1476 System.out.println("defineRule("+ruleName+",modifier="+modifier+ 1477 "): index="+r.index+", nalts="+numAlts); 1478 */ 1479 r.modifier = modifier; 1480 nameToRuleMap.put(ruleName, r); 1481 setRuleAST(ruleName, tree); 1482 r.setOptions(options, ruleToken); 1483 r.argActionAST = argActionAST; 1484 composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1); 1485 composite.ruleIndexToRuleList.set(composite.ruleIndex, r); 1486 composite.ruleIndex++; 1487 if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) { 1488 r.isSynPred = true; 1489 } 1490 } 1491 1492 /** Define a new predicate and get back its name for use in building 1493 * a semantic predicate reference to the syn pred. 1494 */ defineSyntacticPredicate(GrammarAST blockAST, String currentRuleName)1495 public String defineSyntacticPredicate(GrammarAST blockAST, 1496 String currentRuleName) 1497 { 1498 if ( nameToSynpredASTMap==null ) { 1499 nameToSynpredASTMap = new LinkedHashMap<String, GrammarAST>(); 1500 } 1501 String predName = 1502 SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name; 1503 blockAST.setTreeEnclosingRuleNameDeeply(predName); 1504 nameToSynpredASTMap.put(predName, blockAST); 1505 return predName; 1506 } 1507 getSyntacticPredicates()1508 public LinkedHashMap<String, GrammarAST> getSyntacticPredicates() { 1509 return nameToSynpredASTMap; 1510 } 1511 getSyntacticPredicate(String name)1512 public GrammarAST getSyntacticPredicate(String name) { 1513 if ( nameToSynpredASTMap==null ) { 1514 return null; 1515 } 1516 return nameToSynpredASTMap.get(name); 1517 } 1518 synPredUsedInDFA(DFA dfa, SemanticContext semCtx)1519 public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) { 1520 decisionsWhoseDFAsUsesSynPreds.add(dfa); 1521 semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds 1522 } 1523 1524 /* 1525 public Set<Rule> getRuleNamesVisitedDuringLOOK() { 1526 return rulesSensitiveToOtherRules; 1527 } 1528 */ 1529 1530 /** Given @scope::name {action} define it for this grammar. Later, 1531 * the code generator will ask for the actions table. For composite 1532 * grammars, make sure header action propogates down to all delegates. 1533 */ defineNamedAction(GrammarAST ampersandAST, String scope, GrammarAST nameAST, GrammarAST actionAST)1534 public void defineNamedAction(GrammarAST ampersandAST, 1535 String scope, 1536 GrammarAST nameAST, 1537 GrammarAST actionAST) 1538 { 1539 if ( scope==null ) { 1540 scope = getDefaultActionScope(type); 1541 } 1542 //System.out.println("Grammar "+name+" define @"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}"); 1543 String actionName = nameAST.getText(); 1544 Map<String, Object> scopeActions = getActions().get(scope); 1545 if ( scopeActions==null ) { 1546 scopeActions = new HashMap<String, Object>(); 1547 getActions().put(scope, scopeActions); 1548 } 1549 Object a = scopeActions.get(actionName); 1550 if ( a!=null ) { 1551 ErrorManager.grammarError( 1552 ErrorManager.MSG_ACTION_REDEFINITION,this, 1553 nameAST.getToken(),nameAST.getText()); 1554 } 1555 else { 1556 scopeActions.put(actionName,actionAST); 1557 } 1558 // propogate header (regardless of scope (lexer, parser, ...) ? 1559 if ( this==composite.getRootGrammar() && actionName.equals("header") ) { 1560 List<Grammar> allgrammars = composite.getRootGrammar().getDelegates(); 1561 for (Grammar delegate : allgrammars) { 1562 if ( target.isValidActionScope(delegate.type, scope) ) { 1563 //System.out.println("propogate to "+delegate.name); 1564 delegate.defineNamedAction(ampersandAST, scope, nameAST, actionAST); 1565 } 1566 } 1567 } 1568 } 1569 setSynPredGateIfNotAlready(ST gateST)1570 public void setSynPredGateIfNotAlready(ST gateST) { 1571 String scope = getDefaultActionScope(type); 1572 Map<String, Object> actionsForGrammarScope = getActions().get(scope); 1573 // if no synpredgate action set by user then set 1574 if ( (actionsForGrammarScope==null || 1575 !actionsForGrammarScope.containsKey(Grammar.SYNPREDGATE_ACTION_NAME)) ) 1576 { 1577 if ( actionsForGrammarScope==null ) { 1578 actionsForGrammarScope=new HashMap<String, Object>(); 1579 getActions().put(scope, actionsForGrammarScope); 1580 } 1581 actionsForGrammarScope.put(Grammar.SYNPREDGATE_ACTION_NAME, 1582 gateST); 1583 } 1584 } 1585 getActions()1586 public Map<String, Map<String, Object>> getActions() { 1587 return actions; 1588 } 1589 1590 /** Given a grammar type, what should be the default action scope? 1591 * If I say @members in a COMBINED grammar, for example, the 1592 * default scope should be "parser". 1593 */ getDefaultActionScope(int grammarType)1594 public String getDefaultActionScope(int grammarType) { 1595 switch (grammarType) { 1596 case Grammar.LEXER : 1597 return "lexer"; 1598 case Grammar.PARSER : 1599 case Grammar.COMBINED : 1600 return "parser"; 1601 case Grammar.TREE_PARSER : 1602 return "treeparser"; 1603 } 1604 return null; 1605 } 1606 defineLexerRuleFoundInParser(Token ruleToken, GrammarAST ruleAST)1607 public void defineLexerRuleFoundInParser(Token ruleToken, 1608 GrammarAST ruleAST) 1609 { 1610 // System.out.println("rule tree is:\n"+ruleAST.toStringTree()); 1611 /* 1612 String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex, 1613 ruleAST.ruleStopTokenIndex); 1614 */ 1615 // first, create the text of the rule 1616 StringBuilder buf = new StringBuilder(); 1617 buf.append("// $ANTLR src \""); 1618 buf.append(getFileName()); 1619 buf.append("\" "); 1620 buf.append(ruleAST.getLine()); 1621 buf.append("\n"); 1622 for (int i=ruleAST.getTokenStartIndex(); 1623 i<=ruleAST.getTokenStopIndex() && i<tokenBuffer.size(); 1624 i++) 1625 { 1626 CommonToken t = (CommonToken)tokenBuffer.get(i); 1627 // undo the text deletions done by the lexer (ugh) 1628 if ( t.getType()==ANTLRParser.BLOCK ) { 1629 buf.append("("); 1630 } 1631 else if ( t.getType()==ANTLRParser.ACTION ) { 1632 buf.append("{"); 1633 buf.append(t.getText()); 1634 buf.append("}"); 1635 } 1636 else if ( t.getType()==ANTLRParser.SEMPRED || 1637 t.getType()==ANTLRParser.SYN_SEMPRED || 1638 t.getType()==ANTLRParser.GATED_SEMPRED || 1639 t.getType()==ANTLRParser.BACKTRACK_SEMPRED ) 1640 { 1641 buf.append("{"); 1642 buf.append(t.getText()); 1643 buf.append("}?"); 1644 } 1645 else if ( t.getType()==ANTLRParser.ARG_ACTION ) { 1646 buf.append("["); 1647 buf.append(t.getText()); 1648 buf.append("]"); 1649 } 1650 else { 1651 buf.append(t.getText()); 1652 } 1653 } 1654 String ruleText = buf.toString(); 1655 //System.out.println("[["+ruleText+"]]"); 1656 // now put the rule into the lexer grammar template 1657 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1658 lexerGrammarST.add("rules", ruleText); 1659 } 1660 // track this lexer rule's name 1661 composite.lexerRules.add(ruleToken.getText()); 1662 } 1663 1664 /** If someone does PLUS='+' in the parser, must make sure we get 1665 * "PLUS : '+' ;" in lexer not "T73 : '+';" 1666 */ defineLexerRuleForAliasedStringLiteral(String tokenID, String literal, int tokenType)1667 public void defineLexerRuleForAliasedStringLiteral(String tokenID, 1668 String literal, 1669 int tokenType) 1670 { 1671 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1672 //System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType); 1673 lexerGrammarST.addAggr("literals.{ruleName,type,literal}", 1674 tokenID, 1675 Utils.integer(tokenType), 1676 literal); 1677 } 1678 // track this lexer rule's name 1679 composite.lexerRules.add(tokenID); 1680 } 1681 defineLexerRuleForStringLiteral(String literal, int tokenType)1682 public void defineLexerRuleForStringLiteral(String literal, int tokenType) { 1683 //System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType); 1684 // compute new token name like T237 and define it as having tokenType 1685 String tokenID = computeTokenNameFromLiteral(tokenType,literal); 1686 defineToken(tokenID, tokenType); 1687 // tell implicit lexer to define a rule to match the literal 1688 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1689 lexerGrammarST.addAggr("literals.{ruleName,type,literal}", 1690 tokenID, 1691 Utils.integer(tokenType), 1692 literal); 1693 } 1694 } 1695 getLocallyDefinedRule(String ruleName)1696 public Rule getLocallyDefinedRule(String ruleName) { 1697 Rule r = nameToRuleMap.get(ruleName); 1698 return r; 1699 } 1700 getRule(String ruleName)1701 public Rule getRule(String ruleName) { 1702 Rule r = composite.getRule(ruleName); 1703 /* 1704 if ( r!=null && r.grammar != this ) { 1705 System.out.println(name+".getRule("+ruleName+")="+r); 1706 } 1707 */ 1708 return r; 1709 } 1710 getRule(String scopeName, String ruleName)1711 public Rule getRule(String scopeName, String ruleName) { 1712 if ( scopeName!=null ) { // scope override 1713 Grammar scope = composite.getGrammar(scopeName); 1714 if ( scope==null ) { 1715 return null; 1716 } 1717 return scope.getLocallyDefinedRule(ruleName); 1718 } 1719 return getRule(ruleName); 1720 } 1721 getRuleIndex(String scopeName, String ruleName)1722 public int getRuleIndex(String scopeName, String ruleName) { 1723 Rule r = getRule(scopeName, ruleName); 1724 if ( r!=null ) { 1725 return r.index; 1726 } 1727 return INVALID_RULE_INDEX; 1728 } 1729 getRuleIndex(String ruleName)1730 public int getRuleIndex(String ruleName) { 1731 return getRuleIndex(null, ruleName); 1732 } 1733 getRuleName(int ruleIndex)1734 public String getRuleName(int ruleIndex) { 1735 Rule r = composite.ruleIndexToRuleList.get(ruleIndex); 1736 if ( r!=null ) { 1737 return r.name; 1738 } 1739 return null; 1740 } 1741 1742 /** Should codegen.g gen rule for ruleName? 1743 * If synpred, only gen if used in a DFA. 1744 * If regular rule, only gen if not overridden in delegator 1745 * Always gen Tokens rule though. 1746 */ generateMethodForRule(String ruleName)1747 public boolean generateMethodForRule(String ruleName) { 1748 if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) { 1749 // always generate Tokens rule to satisfy lexer interface 1750 // but it may have no alternatives. 1751 return true; 1752 } 1753 if ( overriddenRules.contains(ruleName) ) { 1754 // don't generate any overridden rules 1755 return false; 1756 } 1757 // generate if non-synpred or synpred used in a DFA 1758 Rule r = getLocallyDefinedRule(ruleName); 1759 return !r.isSynPred || 1760 (r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName)); 1761 } 1762 defineGlobalScope(String name, Token scopeAction)1763 public AttributeScope defineGlobalScope(String name, Token scopeAction) { 1764 AttributeScope scope = new AttributeScope(this, name, scopeAction); 1765 scopes.put(name,scope); 1766 return scope; 1767 } 1768 createReturnScope(String ruleName, Token retAction)1769 public AttributeScope createReturnScope(String ruleName, Token retAction) { 1770 AttributeScope scope = new AttributeScope(this, ruleName, retAction); 1771 scope.isReturnScope = true; 1772 return scope; 1773 } 1774 createRuleScope(String ruleName, Token scopeAction)1775 public AttributeScope createRuleScope(String ruleName, Token scopeAction) { 1776 AttributeScope scope = new AttributeScope(this, ruleName, scopeAction); 1777 scope.isDynamicRuleScope = true; 1778 return scope; 1779 } 1780 createParameterScope(String ruleName, Token argAction)1781 public AttributeScope createParameterScope(String ruleName, Token argAction) { 1782 AttributeScope scope = new AttributeScope(this, ruleName, argAction); 1783 scope.isParameterScope = true; 1784 return scope; 1785 } 1786 1787 /** Get a global scope */ getGlobalScope(String name)1788 public AttributeScope getGlobalScope(String name) { 1789 return scopes.get(name); 1790 } 1791 getGlobalScopes()1792 public Map<String, AttributeScope> getGlobalScopes() { 1793 return scopes; 1794 } 1795 1796 /** Define a label defined in a rule r; check the validity then ask the 1797 * Rule object to actually define it. 1798 */ defineLabel(Rule r, Token label, GrammarAST element, int type)1799 protected void defineLabel(Rule r, Token label, GrammarAST element, int type) { 1800 boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type); 1801 if ( err ) { 1802 return; 1803 } 1804 r.defineLabel(label, element, type); 1805 } 1806 defineTokenRefLabel(String ruleName, Token label, GrammarAST tokenRef)1807 public void defineTokenRefLabel(String ruleName, 1808 Token label, 1809 GrammarAST tokenRef) 1810 { 1811 Rule r = getLocallyDefinedRule(ruleName); 1812 if ( r!=null ) { 1813 if ( type==LEXER && 1814 (tokenRef.getType()==ANTLRParser.CHAR_LITERAL|| 1815 tokenRef.getType()==ANTLRParser.BLOCK|| 1816 tokenRef.getType()==ANTLRParser.NOT|| 1817 tokenRef.getType()==ANTLRParser.CHAR_RANGE|| 1818 tokenRef.getType()==ANTLRParser.WILDCARD)) 1819 { 1820 defineLabel(r, label, tokenRef, CHAR_LABEL); 1821 } 1822 else { 1823 defineLabel(r, label, tokenRef, TOKEN_LABEL); 1824 } 1825 } 1826 } 1827 defineWildcardTreeLabel(String ruleName, Token label, GrammarAST tokenRef)1828 public void defineWildcardTreeLabel(String ruleName, 1829 Token label, 1830 GrammarAST tokenRef) 1831 { 1832 Rule r = getLocallyDefinedRule(ruleName); 1833 if ( r!=null ) { 1834 defineLabel(r, label, tokenRef, WILDCARD_TREE_LABEL); 1835 } 1836 } 1837 defineWildcardTreeListLabel(String ruleName, Token label, GrammarAST tokenRef)1838 public void defineWildcardTreeListLabel(String ruleName, 1839 Token label, 1840 GrammarAST tokenRef) 1841 { 1842 Rule r = getLocallyDefinedRule(ruleName); 1843 if ( r!=null ) { 1844 defineLabel(r, label, tokenRef, WILDCARD_TREE_LIST_LABEL); 1845 } 1846 } 1847 defineRuleRefLabel(String ruleName, Token label, GrammarAST ruleRef)1848 public void defineRuleRefLabel(String ruleName, 1849 Token label, 1850 GrammarAST ruleRef) 1851 { 1852 Rule r = getLocallyDefinedRule(ruleName); 1853 if ( r!=null ) { 1854 defineLabel(r, label, ruleRef, RULE_LABEL); 1855 } 1856 } 1857 defineTokenListLabel(String ruleName, Token label, GrammarAST element)1858 public void defineTokenListLabel(String ruleName, 1859 Token label, 1860 GrammarAST element) 1861 { 1862 Rule r = getLocallyDefinedRule(ruleName); 1863 if ( r!=null ) { 1864 defineLabel(r, label, element, TOKEN_LIST_LABEL); 1865 } 1866 } 1867 defineRuleListLabel(String ruleName, Token label, GrammarAST element)1868 public void defineRuleListLabel(String ruleName, 1869 Token label, 1870 GrammarAST element) 1871 { 1872 Rule r = getLocallyDefinedRule(ruleName); 1873 if ( r!=null ) { 1874 if ( !r.getHasMultipleReturnValues() ) { 1875 ErrorManager.grammarError( 1876 ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this, 1877 label,label.getText()); 1878 } 1879 defineLabel(r, label, element, RULE_LIST_LABEL); 1880 } 1881 } 1882 1883 /** Given a set of all rewrite elements on right of ->, filter for 1884 * label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ... 1885 * Return a displayable token type name computed from the GrammarAST. 1886 */ getLabels(Set<GrammarAST> rewriteElements, int labelType)1887 public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) { 1888 Set<String> labels = new HashSet<String>(); 1889 for (GrammarAST el : rewriteElements) { 1890 if ( el.getType()==ANTLRParser.LABEL ) { 1891 String labelName = el.getText(); 1892 Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName); 1893 if ( enclosingRule==null ) continue; 1894 LabelElementPair pair = enclosingRule.getLabel(labelName); 1895 /* 1896 // if tree grammar and we have a wildcard, only notice it 1897 // when looking for rule labels not token label. x=. should 1898 // look like a rule ref since could be subtree. 1899 if ( type==TREE_PARSER && pair!=null && 1900 pair.elementRef.getType()==ANTLRParser.WILDCARD ) 1901 { 1902 if ( labelType==WILDCARD_TREE_LABEL ) { 1903 labels.add(labelName); 1904 continue; 1905 } 1906 else continue; 1907 } 1908 */ 1909 // if valid label and type is what we're looking for 1910 // and not ref to old value val $rule, add to list 1911 if ( pair!=null && pair.type==labelType && 1912 !labelName.equals(el.enclosingRuleName) ) 1913 { 1914 labels.add(labelName); 1915 } 1916 } 1917 } 1918 return labels; 1919 } 1920 1921 /** Before generating code, we examine all actions that can have 1922 * $x.y and $y stuff in them because some code generation depends on 1923 * Rule.referencedPredefinedRuleAttributes. I need to remove unused 1924 * rule labels for example. 1925 */ examineAllExecutableActions()1926 protected void examineAllExecutableActions() { 1927 Collection<Rule> rules = getRules(); 1928 for (Rule r : rules) { 1929 // walk all actions within the rule elements, args, and exceptions 1930 List<GrammarAST> actions = r.getInlineActions(); 1931 for (int i = 0; i < actions.size(); i++) { 1932 GrammarAST actionAST = actions.get(i); 1933 ActionAnalysis sniffer = 1934 new ActionAnalysis(this, r.name, actionAST); 1935 sniffer.analyze(); 1936 } 1937 // walk any named actions like @init, @after 1938 Collection<? extends Object> namedActions = r.getActions().values(); 1939 for (Object namedAction : namedActions) { 1940 GrammarAST actionAST = (GrammarAST)namedAction; 1941 ActionAnalysis sniffer = 1942 new ActionAnalysis(this, r.name, actionAST); 1943 sniffer.analyze(); 1944 } 1945 } 1946 } 1947 1948 /** Remove all labels on rule refs whose target rules have no return value. 1949 * Do this for all rules in grammar. 1950 */ checkAllRulesForUselessLabels()1951 public void checkAllRulesForUselessLabels() { 1952 if ( type==LEXER ) { 1953 return; 1954 } 1955 Set<String> rules = nameToRuleMap.keySet(); 1956 for (String ruleName : rules) { 1957 Rule r = getRule(ruleName); 1958 removeUselessLabels(r.getRuleLabels()); 1959 removeUselessLabels(r.getRuleListLabels()); 1960 } 1961 } 1962 1963 /** A label on a rule is useless if the rule has no return value, no 1964 * tree or template output, and it is not referenced in an action. 1965 */ removeUselessLabels(Map<String, LabelElementPair> ruleToElementLabelPairMap)1966 protected void removeUselessLabels(Map<String, LabelElementPair> ruleToElementLabelPairMap) { 1967 if ( ruleToElementLabelPairMap==null ) { 1968 return; 1969 } 1970 Collection<LabelElementPair> labels = ruleToElementLabelPairMap.values(); 1971 List<String> kill = new ArrayList<String>(); 1972 for (LabelElementPair pair : labels) { 1973 Rule refdRule = getRule(pair.elementRef.getText()); 1974 if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) { 1975 //System.out.println(pair.label.getText()+" is useless"); 1976 kill.add(pair.label.getText()); 1977 } 1978 } 1979 for (int i = 0; i < kill.size(); i++) { 1980 String labelToKill = kill.get(i); 1981 // System.out.println("kill "+labelToKill); 1982 ruleToElementLabelPairMap.remove(labelToKill); 1983 } 1984 } 1985 1986 /** Track a rule reference within an outermost alt of a rule. Used 1987 * at the moment to decide if $ruleref refers to a unique rule ref in 1988 * the alt. Rewrite rules force tracking of all rule AST results. 1989 * 1990 * This data is also used to verify that all rules have been defined. 1991 */ altReferencesRule(String enclosingRuleName, GrammarAST refScopeAST, GrammarAST refAST, int outerAltNum)1992 public void altReferencesRule(String enclosingRuleName, 1993 GrammarAST refScopeAST, 1994 GrammarAST refAST, 1995 int outerAltNum) 1996 { 1997 /* Do nothing for now; not sure need; track S.x as x 1998 String scope = null; 1999 Grammar scopeG = null; 2000 if ( refScopeAST!=null ) { 2001 if ( !scopedRuleRefs.contains(refScopeAST) ) { 2002 scopedRuleRefs.add(refScopeAST); 2003 } 2004 scope = refScopeAST.getText(); 2005 } 2006 */ 2007 Rule r = getRule(enclosingRuleName); 2008 if ( r==null ) { 2009 return; // no error here; see NameSpaceChecker 2010 } 2011 r.trackRuleReferenceInAlt(refAST, outerAltNum); 2012 Token refToken = refAST.getToken(); 2013 if ( !ruleRefs.contains(refAST) ) { 2014 ruleRefs.add(refAST); 2015 } 2016 } 2017 2018 /** Track a token reference within an outermost alt of a rule. Used 2019 * to decide if $tokenref refers to a unique token ref in 2020 * the alt. Does not track literals! 2021 * 2022 * Rewrite rules force tracking of all tokens. 2023 */ altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum)2024 public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) { 2025 Rule r = getLocallyDefinedRule(ruleName); 2026 if ( r==null ) { 2027 return; 2028 } 2029 r.trackTokenReferenceInAlt(refAST, outerAltNum); 2030 if ( !tokenIDRefs.contains(refAST.getToken()) ) { 2031 tokenIDRefs.add(refAST.getToken()); 2032 } 2033 } 2034 2035 /** To yield smaller, more readable code, track which rules have their 2036 * predefined attributes accessed. If the rule has no user-defined 2037 * return values, then don't generate the return value scope classes 2038 * etc... Make the rule have void return value. Don't track for lexer 2039 * rules. 2040 */ referenceRuleLabelPredefinedAttribute(String ruleName)2041 public void referenceRuleLabelPredefinedAttribute(String ruleName) { 2042 Rule r = getRule(ruleName); 2043 if ( r!=null && type!=LEXER ) { 2044 // indicate that an action ref'd an attr unless it's in a lexer 2045 // so that $ID.text refs don't force lexer rules to define 2046 // return values...Token objects are created by the caller instead. 2047 r.referencedPredefinedRuleAttributes = true; 2048 } 2049 } 2050 checkAllRulesForLeftRecursion()2051 public List<? extends Collection<? extends Rule>> checkAllRulesForLeftRecursion() { 2052 return sanity.checkAllRulesForLeftRecursion(); 2053 } 2054 2055 /** Return a list of left-recursive rules; no analysis can be done 2056 * successfully on these. Useful to skip these rules then and also 2057 * for ANTLRWorks to highlight them. 2058 */ getLeftRecursiveRules()2059 public Set<Rule> getLeftRecursiveRules() { 2060 if ( nfa==null ) { 2061 buildNFA(); 2062 } 2063 if ( leftRecursiveRules!=null ) { 2064 return leftRecursiveRules; 2065 } 2066 sanity.checkAllRulesForLeftRecursion(); 2067 return leftRecursiveRules; 2068 } 2069 checkRuleReference(GrammarAST scopeAST, GrammarAST refAST, GrammarAST argsAST, String currentRuleName)2070 public void checkRuleReference(GrammarAST scopeAST, 2071 GrammarAST refAST, 2072 GrammarAST argsAST, 2073 String currentRuleName) 2074 { 2075 sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName); 2076 } 2077 2078 /** Rules like "a : ;" and "a : {...} ;" should not generate 2079 * try/catch blocks for RecognitionException. To detect this 2080 * it's probably ok to just look for any reference to an atom 2081 * that can match some input. W/o that, the rule is unlikey to have 2082 * any else. 2083 */ isEmptyRule(GrammarAST block)2084 public boolean isEmptyRule(GrammarAST block) { 2085 BitSet nonEmptyTerminals = new BitSet(); 2086 nonEmptyTerminals.set(ANTLRParser.TOKEN_REF); 2087 nonEmptyTerminals.set(ANTLRParser.STRING_LITERAL); 2088 nonEmptyTerminals.set(ANTLRParser.CHAR_LITERAL); 2089 nonEmptyTerminals.set(ANTLRParser.WILDCARD); 2090 nonEmptyTerminals.set(ANTLRParser.RULE_REF); 2091 return findFirstTypeOutsideRewrite(block, nonEmptyTerminals) == null; 2092 } 2093 findFirstTypeOutsideRewrite(GrammarAST block, BitSet types)2094 protected GrammarAST findFirstTypeOutsideRewrite(GrammarAST block, BitSet types) { 2095 ArrayList<GrammarAST> worklist = new ArrayList<GrammarAST>(); 2096 worklist.add(block); 2097 while (!worklist.isEmpty()) { 2098 GrammarAST current = worklist.remove(worklist.size() - 1); 2099 if (current.getType() == ANTLRParser.REWRITE) { 2100 continue; 2101 } 2102 2103 if (current.getType() >= 0 && types.get(current.getType())) { 2104 return current; 2105 } 2106 2107 worklist.addAll(Arrays.asList(current.getChildrenAsArray())); 2108 } 2109 2110 return null; 2111 } 2112 isAtomTokenType(int ttype)2113 public boolean isAtomTokenType(int ttype) { 2114 return ttype == ANTLRParser.WILDCARD|| 2115 ttype == ANTLRParser.CHAR_LITERAL|| 2116 ttype == ANTLRParser.CHAR_RANGE|| 2117 ttype == ANTLRParser.STRING_LITERAL|| 2118 ttype == ANTLRParser.NOT|| 2119 (type != LEXER && ttype == ANTLRParser.TOKEN_REF); 2120 } 2121 getTokenType(String tokenName)2122 public int getTokenType(String tokenName) { 2123 Integer I; 2124 if ( tokenName.charAt(0)=='\'') { 2125 I = composite.stringLiteralToTypeMap.get(tokenName); 2126 } 2127 else { // must be a label like ID 2128 I = composite.tokenIDToTypeMap.get(tokenName); 2129 } 2130 int i = (I!=null)? I :Label.INVALID; 2131 //System.out.println("grammar type "+type+" "+tokenName+"->"+i); 2132 return i; 2133 } 2134 2135 /** Get the list of tokens that are IDs like BLOCK and LPAREN */ getTokenIDs()2136 public Set<String> getTokenIDs() { 2137 return composite.tokenIDToTypeMap.keySet(); 2138 } 2139 2140 /** Return an ordered integer list of token types that have no 2141 * corresponding token ID like INT or KEYWORD_BEGIN; for stuff 2142 * like 'begin'. 2143 */ getTokenTypesWithoutID()2144 public Collection<Integer> getTokenTypesWithoutID() { 2145 List<Integer> types = new ArrayList<Integer>(); 2146 for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) { 2147 String name = getTokenDisplayName(t); 2148 if ( name.charAt(0)=='\'' ) { 2149 types.add(Utils.integer(t)); 2150 } 2151 } 2152 return types; 2153 } 2154 2155 /** Get a list of all token IDs and literals that have an associated 2156 * token type. 2157 */ getTokenDisplayNames()2158 public Set<String> getTokenDisplayNames() { 2159 Set<String> names = new HashSet<String>(); 2160 for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) { 2161 names.add(getTokenDisplayName(t)); 2162 } 2163 return names; 2164 } 2165 2166 /** Given a literal like (the 3 char sequence with single quotes) 'a', 2167 * return the int value of 'a'. Convert escape sequences here also. 2168 * ANTLR's antlr.g parser does not convert escape sequences. 2169 * 2170 * 11/26/2005: I changed literals to always be '...' even for strings. 2171 * This routine still works though. 2172 */ getCharValueFromGrammarCharLiteral(String literal)2173 public static int getCharValueFromGrammarCharLiteral(String literal) { 2174 switch ( literal.length() ) { 2175 case 3 : 2176 // 'x' 2177 return literal.charAt(1); // no escape char 2178 case 4 : 2179 // '\x' (antlr lexer will catch invalid char) 2180 if ( Character.isDigit(literal.charAt(2)) ) { 2181 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2182 "invalid char literal: "+literal); 2183 return -1; 2184 } 2185 int escChar = literal.charAt(2); 2186 int charVal = ANTLRLiteralEscapedCharValue[escChar]; 2187 if ( charVal==0 ) { 2188 // Unnecessary escapes like '\{' should just yield { 2189 return escChar; 2190 } 2191 return charVal; 2192 case 8 : 2193 // '\u1234' 2194 String unicodeChars = literal.substring(3,literal.length()-1); 2195 return Integer.parseInt(unicodeChars, 16); 2196 default : 2197 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2198 "invalid char literal: "+literal); 2199 return -1; 2200 } 2201 } 2202 2203 /** ANTLR does not convert escape sequences during the parse phase because 2204 * it could not know how to print String/char literals back out when 2205 * printing grammars etc... Someone in China might use the real unicode 2206 * char in a literal as it will display on their screen; when printing 2207 * back out, I could not know whether to display or use a unicode escape. 2208 * 2209 * This routine converts a string literal with possible escape sequences 2210 * into a pure string of 16-bit char values. Escapes and unicode \u0000 2211 * specs are converted to pure chars. return in a buffer; people may 2212 * want to walk/manipulate further. 2213 * 2214 * The NFA construction routine must know the actual char values. 2215 */ getUnescapedStringFromGrammarStringLiteral(String literal)2216 public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) { 2217 //System.out.println("escape: ["+literal+"]"); 2218 StringBuffer buf = new StringBuffer(); 2219 int last = literal.length()-1; // skip quotes on outside 2220 for (int i=1; i<last; i++) { 2221 char c = literal.charAt(i); 2222 if ( c=='\\' ) { 2223 i++; 2224 c = literal.charAt(i); 2225 if ( Character.toUpperCase(c)=='U' ) { 2226 // \u0000 2227 i++; 2228 String unicodeChars = literal.substring(i,i+4); 2229 // parse the unicode 16 bit hex value 2230 int val = Integer.parseInt(unicodeChars, 16); 2231 i+=4-1; // loop will inc by 1; only jump 3 then 2232 buf.append((char)val); 2233 } 2234 else if ( Character.isDigit(c) ) { 2235 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2236 "invalid char literal: "+literal); 2237 buf.append("\\").append(c); 2238 } 2239 else { 2240 buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape 2241 } 2242 } 2243 else { 2244 buf.append(c); // simple char x 2245 } 2246 } 2247 //System.out.println("string: ["+buf.toString()+"]"); 2248 return buf; 2249 } 2250 2251 /** Pull your token definitions from an existing grammar in memory. 2252 * You must use Grammar() ctor then this method then setGrammarContent() 2253 * to make this work. This was useful primarily for testing and 2254 * interpreting grammars until I added import grammar functionality. 2255 * When you import a grammar you implicitly import its vocabulary as well 2256 * and keep the same token type values. 2257 * 2258 * Returns the max token type found. 2259 */ importTokenVocabulary(Grammar importFromGr)2260 public int importTokenVocabulary(Grammar importFromGr) { 2261 Set<String> importedTokenIDs = importFromGr.getTokenIDs(); 2262 for (String tokenID : importedTokenIDs) { 2263 int tokenType = importFromGr.getTokenType(tokenID); 2264 composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); 2265 if ( tokenType>=Label.MIN_TOKEN_TYPE ) { 2266 //System.out.println("import token from grammar "+tokenID+"="+tokenType); 2267 defineToken(tokenID, tokenType); 2268 } 2269 } 2270 return composite.maxTokenType; // return max found 2271 } 2272 2273 /** Import the rules/tokens of a delegate grammar. All delegate grammars are 2274 * read during the ctor of first Grammar created. 2275 * 2276 * Do not create NFA here because NFA construction needs to hook up with 2277 * overridden rules in delegation root grammar. 2278 */ importGrammar(GrammarAST grammarNameAST, String label)2279 public void importGrammar(GrammarAST grammarNameAST, String label) { 2280 String grammarName = grammarNameAST.getText(); 2281 //System.out.println("import "+gfile.getName()); 2282 String gname = grammarName + GRAMMAR_FILE_EXTENSION; 2283 BufferedReader br = null; 2284 try { 2285 String fullName = tool.getLibraryFile(gname); 2286 FileReader fr = new FileReader(fullName); 2287 br = new BufferedReader(fr); 2288 Grammar delegateGrammar; 2289 delegateGrammar = new Grammar(tool, gname, composite); 2290 delegateGrammar.label = label; 2291 2292 addDelegateGrammar(delegateGrammar); 2293 2294 delegateGrammar.parseAndBuildAST(br); 2295 delegateGrammar.addRulesForSyntacticPredicates(); 2296 if ( !validImport(delegateGrammar) ) { 2297 ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT, 2298 this, 2299 grammarNameAST.token, 2300 this, 2301 delegateGrammar); 2302 return; 2303 } 2304 if ( this.type==COMBINED && 2305 (delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])|| 2306 delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) ) 2307 { 2308 ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH, 2309 this, 2310 grammarNameAST.token, 2311 this, 2312 delegateGrammar); 2313 return; 2314 } 2315 if ( delegateGrammar.grammarTree!=null ) { 2316 // we have a valid grammar 2317 // deal with combined grammars 2318 if ( delegateGrammar.type == LEXER && this.type == COMBINED ) { 2319 // ooops, we wasted some effort; tell lexer to read it in 2320 // later 2321 lexerGrammarST.add("imports", grammarName); 2322 // but, this parser grammar will need the vocab 2323 // so add to composite anyway so we suck in the tokens later 2324 } 2325 } 2326 //System.out.println("Got grammar:\n"+delegateGrammar); 2327 } 2328 catch (IOException ioe) { 2329 ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE, 2330 gname, 2331 ioe); 2332 } 2333 finally { 2334 if ( br!=null ) { 2335 try { 2336 br.close(); 2337 } 2338 catch (IOException ioe) { 2339 ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE, 2340 gname, 2341 ioe); 2342 } 2343 } 2344 } 2345 } 2346 2347 /** add new delegate to composite tree */ addDelegateGrammar(Grammar delegateGrammar)2348 protected void addDelegateGrammar(Grammar delegateGrammar) { 2349 CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this); 2350 t.addChild(new CompositeGrammarTree(delegateGrammar)); 2351 // make sure new grammar shares this composite 2352 delegateGrammar.composite = this.composite; 2353 } 2354 2355 /** Load a vocab file <vocabName>.tokens and return max token type found. */ importTokenVocabulary(GrammarAST tokenVocabOptionAST, String vocabName)2356 public int importTokenVocabulary(GrammarAST tokenVocabOptionAST, 2357 String vocabName) 2358 { 2359 if ( !getGrammarIsRoot() ) { 2360 ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE, 2361 this, 2362 tokenVocabOptionAST.token, 2363 name); 2364 return composite.maxTokenType; 2365 } 2366 2367 File fullFile = tool.getImportedVocabFile(vocabName); 2368 try { 2369 FileReader fr = new FileReader(fullFile); 2370 BufferedReader br = new BufferedReader(fr); 2371 StreamTokenizer tokenizer = new StreamTokenizer(br); 2372 tokenizer.parseNumbers(); 2373 tokenizer.wordChars('_', '_'); 2374 tokenizer.eolIsSignificant(true); 2375 tokenizer.slashSlashComments(true); 2376 tokenizer.slashStarComments(true); 2377 tokenizer.ordinaryChar('='); 2378 tokenizer.quoteChar('\''); 2379 tokenizer.whitespaceChars(' ',' '); 2380 tokenizer.whitespaceChars('\t','\t'); 2381 int lineNum = 1; 2382 int token = tokenizer.nextToken(); 2383 while (token != StreamTokenizer.TT_EOF) { 2384 String tokenID; 2385 if ( token == StreamTokenizer.TT_WORD ) { 2386 tokenID = tokenizer.sval; 2387 } 2388 else if ( token == '\'' ) { 2389 tokenID = "'"+tokenizer.sval+"'"; 2390 } 2391 else { 2392 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2393 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2394 Utils.integer(lineNum)); 2395 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} 2396 token = tokenizer.nextToken(); 2397 continue; 2398 } 2399 token = tokenizer.nextToken(); 2400 if ( token != '=' ) { 2401 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2402 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2403 Utils.integer(lineNum)); 2404 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} 2405 token = tokenizer.nextToken(); 2406 continue; 2407 } 2408 token = tokenizer.nextToken(); // skip '=' 2409 if ( token != StreamTokenizer.TT_NUMBER ) { 2410 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2411 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2412 Utils.integer(lineNum)); 2413 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} 2414 token = tokenizer.nextToken(); 2415 continue; 2416 } 2417 int tokenType = (int)tokenizer.nval; 2418 token = tokenizer.nextToken(); 2419 //System.out.println("import "+tokenID+"="+tokenType); 2420 composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); 2421 defineToken(tokenID, tokenType); 2422 lineNum++; 2423 if ( token != StreamTokenizer.TT_EOL ) { 2424 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2425 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2426 Utils.integer(lineNum)); 2427 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {} 2428 token = tokenizer.nextToken(); 2429 continue; 2430 } 2431 token = tokenizer.nextToken(); // skip newline 2432 } 2433 br.close(); 2434 } 2435 catch (FileNotFoundException fnfe) { 2436 ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE, 2437 fullFile); 2438 } 2439 catch (IOException ioe) { 2440 ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, 2441 fullFile, 2442 ioe); 2443 } 2444 catch (Exception e) { 2445 ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, 2446 fullFile, 2447 e); 2448 } 2449 return composite.maxTokenType; 2450 } 2451 2452 /** Given a token type, get a meaningful name for it such as the ID 2453 * or string literal. If this is a lexer and the ttype is in the 2454 * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. 2455 */ getTokenDisplayName(int ttype)2456 public String getTokenDisplayName(int ttype) { 2457 String tokenName; 2458 int index; 2459 // inside any target's char range and is lexer grammar? 2460 if ( this.type==LEXER && 2461 ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE ) 2462 { 2463 return getANTLRCharLiteralForChar(ttype); 2464 } 2465 // faux label? 2466 else if ( ttype<0 ) { 2467 tokenName = composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype); 2468 } 2469 else { 2470 // compute index in typeToTokenList for ttype 2471 index = ttype-1; // normalize to 0..n-1 2472 index += Label.NUM_FAUX_LABELS; // jump over faux tokens 2473 2474 if ( index<composite.typeToTokenList.size() ) { 2475 tokenName = composite.typeToTokenList.get(index); 2476 if ( tokenName!=null && 2477 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) ) 2478 { 2479 tokenName = composite.typeToStringLiteralList.get(ttype); 2480 } 2481 } 2482 else { 2483 tokenName = String.valueOf(ttype); 2484 } 2485 } 2486 //System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName); 2487 return tokenName; 2488 } 2489 2490 /** Get the list of ANTLR String literals */ getStringLiterals()2491 public Set<String> getStringLiterals() { 2492 return composite.stringLiteralToTypeMap.keySet(); 2493 } 2494 getGrammarTypeString()2495 public String getGrammarTypeString() { 2496 return grammarTypeToString[type]; 2497 } 2498 getGrammarMaxLookahead()2499 public int getGrammarMaxLookahead() { 2500 if ( global_k>=0 ) { 2501 return global_k; 2502 } 2503 Object k = getOption("k"); 2504 if ( k==null ) { 2505 global_k = 0; 2506 } 2507 else if (k instanceof Integer) { 2508 Integer kI = (Integer)k; 2509 global_k = kI; 2510 } 2511 else { 2512 // must be String "*" 2513 if ( k.equals("*") ) { // this the default anyway 2514 global_k = 0; 2515 } 2516 } 2517 return global_k; 2518 } 2519 2520 /** Save the option key/value pair and process it; return the key 2521 * or null if invalid option. 2522 */ setOption(String key, Object value, Token optionsStartToken)2523 public String setOption(String key, Object value, Token optionsStartToken) { 2524 if ( legalOption(key) ) { 2525 ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION, 2526 this, 2527 optionsStartToken, 2528 key); 2529 return null; 2530 } 2531 if ( !optionIsValid(key, value) ) { 2532 return null; 2533 } 2534 if ( key.equals("backtrack") && value.toString().equals("true") ) { 2535 composite.getRootGrammar().atLeastOneBacktrackOption = true; 2536 } 2537 if ( options==null ) { 2538 options = new HashMap<String, Object>(); 2539 } 2540 options.put(key, value); 2541 return key; 2542 } 2543 legalOption(String key)2544 public boolean legalOption(String key) { 2545 switch ( type ) { 2546 case LEXER : 2547 return !legalLexerOptions.contains(key); 2548 case PARSER : 2549 return !legalParserOptions.contains(key); 2550 case TREE_PARSER : 2551 return !legalTreeParserOptions.contains(key); 2552 default : 2553 return !legalParserOptions.contains(key); 2554 } 2555 } 2556 setOptions(Map<String, Object> options, Token optionsStartToken)2557 public void setOptions(Map<String, Object> options, Token optionsStartToken) { 2558 if ( options==null ) { 2559 this.options = null; 2560 return; 2561 } 2562 Set<String> keys = options.keySet(); 2563 for (Iterator<String> it = keys.iterator(); it.hasNext();) { 2564 String optionName = it.next(); 2565 Object optionValue = options.get(optionName); 2566 String stored=setOption(optionName, optionValue, optionsStartToken); 2567 if ( stored==null ) { 2568 it.remove(); 2569 } 2570 } 2571 } 2572 getOption(String key)2573 public Object getOption(String key) { 2574 return composite.getOption(key); 2575 } 2576 getLocallyDefinedOption(String key)2577 public Object getLocallyDefinedOption(String key) { 2578 Object value = null; 2579 if ( options!=null ) { 2580 value = options.get(key); 2581 } 2582 if ( value==null ) { 2583 value = defaultOptions.get(key); 2584 } 2585 return value; 2586 } 2587 getBlockOption(GrammarAST blockAST, String key)2588 public Object getBlockOption(GrammarAST blockAST, String key) { 2589 String v = (String)blockAST.getBlockOption(key); 2590 if ( v!=null ) { 2591 return v; 2592 } 2593 if ( type==Grammar.LEXER ) { 2594 return defaultLexerBlockOptions.get(key); 2595 } 2596 return defaultBlockOptions.get(key); 2597 } 2598 getUserMaxLookahead(int decision)2599 public int getUserMaxLookahead(int decision) { 2600 int user_k = 0; 2601 GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision); 2602 Object k = blockAST.getBlockOption("k"); 2603 if ( k==null ) { 2604 user_k = nfa.grammar.getGrammarMaxLookahead(); 2605 return user_k; 2606 } 2607 if (k instanceof Integer) { 2608 Integer kI = (Integer)k; 2609 user_k = kI; 2610 } 2611 else { 2612 // must be String "*" 2613 if ( k.equals("*") ) { 2614 user_k = 0; 2615 } 2616 } 2617 return user_k; 2618 } 2619 getAutoBacktrackMode(int decision)2620 public boolean getAutoBacktrackMode(int decision) { 2621 NFAState decisionNFAStartState = getDecisionNFAStartState(decision); 2622 String autoBacktrack = 2623 (String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack"); 2624 2625 if ( autoBacktrack==null ) { 2626 autoBacktrack = (String)nfa.grammar.getOption("backtrack"); 2627 } 2628 return autoBacktrack!=null&&autoBacktrack.equals("true"); 2629 } 2630 optionIsValid(String key, Object value)2631 public boolean optionIsValid(String key, Object value) { 2632 return true; 2633 } 2634 buildAST()2635 public boolean buildAST() { 2636 String outputType = (String)getOption("output"); 2637 if ( outputType!=null ) { 2638 return outputType.toString().equals("AST"); 2639 } 2640 return false; 2641 } 2642 rewriteMode()2643 public boolean rewriteMode() { 2644 Object outputType = getOption("rewrite"); 2645 if ( outputType!=null ) { 2646 return outputType.toString().equals("true"); 2647 } 2648 return false; 2649 } 2650 isBuiltFromString()2651 public boolean isBuiltFromString() { 2652 return builtFromString; 2653 } 2654 buildTemplate()2655 public boolean buildTemplate() { 2656 String outputType = (String)getOption("output"); 2657 if ( outputType!=null ) { 2658 return outputType.toString().equals("template"); 2659 } 2660 return false; 2661 } 2662 getRules()2663 public Collection<Rule> getRules() { 2664 return nameToRuleMap.values(); 2665 } 2666 2667 /** Get the set of Rules that need to have manual delegations 2668 * like "void rule() { importedGrammar.rule(); }" 2669 * 2670 * If this grammar is master, get list of all rule definitions from all 2671 * delegate grammars. Only master has complete interface from combined 2672 * grammars...we will generated delegates as helper objects. 2673 * 2674 * Composite grammars that are not the root/master do not have complete 2675 * interfaces. It is not my intention that people use subcomposites. 2676 * Only the outermost grammar should be used from outside code. The 2677 * other grammar components are specifically generated to work only 2678 * with the master/root. 2679 * 2680 * delegatedRules = imported - overridden 2681 */ getDelegatedRules()2682 public Set<? extends Rule> getDelegatedRules() { 2683 return composite.getDelegatedRules(this); 2684 } 2685 2686 /** Get set of all rules imported from all delegate grammars even if 2687 * indirectly delegated. 2688 */ getAllImportedRules()2689 public Set<? extends Rule> getAllImportedRules() { 2690 return composite.getAllImportedRules(this); 2691 } 2692 2693 /** Get list of all delegates from all grammars directly or indirectly 2694 * imported into this grammar. 2695 */ getDelegates()2696 public List<Grammar> getDelegates() { 2697 return composite.getDelegates(this); 2698 } 2699 getHasDelegates()2700 public boolean getHasDelegates() { 2701 return !getDelegates().isEmpty(); 2702 } 2703 getDelegateNames()2704 public List<String> getDelegateNames() { 2705 // compute delegates:{Grammar g | return g.name;} 2706 List<String> names = new ArrayList<String>(); 2707 List<Grammar> delegates = composite.getDelegates(this); 2708 if ( delegates!=null ) { 2709 for (Grammar g : delegates) { 2710 names.add(g.name); 2711 } 2712 } 2713 return names; 2714 } 2715 getDirectDelegates()2716 public List<Grammar> getDirectDelegates() { 2717 return composite.getDirectDelegates(this); 2718 } 2719 2720 /** Get delegates below direct delegates */ getIndirectDelegates()2721 public List<Grammar> getIndirectDelegates() { 2722 return composite.getIndirectDelegates(this); 2723 } 2724 2725 /** Get list of all delegators. This amounts to the grammars on the path 2726 * to the root of the delegation tree. 2727 */ getDelegators()2728 public List<Grammar> getDelegators() { 2729 return composite.getDelegators(this); 2730 } 2731 2732 /** Who's my direct parent grammar? */ getDelegator()2733 public Grammar getDelegator() { 2734 return composite.getDelegator(this); 2735 } 2736 getDelegatedRuleReferences()2737 public Set<Rule> getDelegatedRuleReferences() { 2738 return delegatedRuleReferences; 2739 } 2740 getGrammarIsRoot()2741 public boolean getGrammarIsRoot() { 2742 return composite.delegateGrammarTreeRoot.grammar == this; 2743 } 2744 setRuleAST(String ruleName, GrammarAST t)2745 public void setRuleAST(String ruleName, GrammarAST t) { 2746 Rule r = getLocallyDefinedRule(ruleName); 2747 if ( r!=null ) { 2748 r.tree = t; 2749 r.EORNode = t.getLastChild(); 2750 } 2751 } 2752 getRuleStartState(String ruleName)2753 public NFAState getRuleStartState(String ruleName) { 2754 return getRuleStartState(null, ruleName); 2755 } 2756 getRuleStartState(String scopeName, String ruleName)2757 public NFAState getRuleStartState(String scopeName, String ruleName) { 2758 Rule r = getRule(scopeName, ruleName); 2759 if ( r!=null ) { 2760 //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState); 2761 return r.startState; 2762 } 2763 //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null"); 2764 return null; 2765 } 2766 getRuleModifier(String ruleName)2767 public String getRuleModifier(String ruleName) { 2768 Rule r = getRule(ruleName); 2769 if ( r!=null ) { 2770 return r.modifier; 2771 } 2772 return null; 2773 } 2774 getRuleStopState(String ruleName)2775 public NFAState getRuleStopState(String ruleName) { 2776 Rule r = getRule(ruleName); 2777 if ( r!=null ) { 2778 return r.stopState; 2779 } 2780 return null; 2781 } 2782 assignDecisionNumber(NFAState state)2783 public int assignDecisionNumber(NFAState state) { 2784 decisionCount++; 2785 state.setDecisionNumber(decisionCount); 2786 return decisionCount; 2787 } 2788 getDecision(int decision)2789 protected Decision getDecision(int decision) { 2790 int index = decision-1; 2791 if ( index >= indexToDecision.size() ) { 2792 return null; 2793 } 2794 Decision d = indexToDecision.get(index); 2795 return d; 2796 } 2797 getDecisions()2798 public List<Decision> getDecisions() { 2799 return indexToDecision; 2800 } 2801 createDecision(int decision)2802 protected Decision createDecision(int decision) { 2803 int index = decision-1; 2804 if ( index < indexToDecision.size() ) { 2805 return getDecision(decision); // don't recreate 2806 } 2807 Decision d = new Decision(); 2808 d.decision = decision; 2809 d.grammar = this; 2810 indexToDecision.setSize(getNumberOfDecisions()); 2811 indexToDecision.set(index, d); 2812 return d; 2813 } 2814 getDecisionNFAStartStateList()2815 public List<NFAState> getDecisionNFAStartStateList() { 2816 List<NFAState> states = new ArrayList<NFAState>(100); 2817 for (int d = 0; d < indexToDecision.size(); d++) { 2818 Decision dec = indexToDecision.get(d); 2819 states.add(dec.startState); 2820 } 2821 return states; 2822 } 2823 getDecisionNFAStartState(int decision)2824 public NFAState getDecisionNFAStartState(int decision) { 2825 Decision d = getDecision(decision); 2826 if ( d==null ) { 2827 return null; 2828 } 2829 return d.startState; 2830 } 2831 getLookaheadDFA(int decision)2832 public DFA getLookaheadDFA(int decision) { 2833 Decision d = getDecision(decision); 2834 if ( d==null ) { 2835 return null; 2836 } 2837 return d.dfa; 2838 } 2839 getDecisionBlockAST(int decision)2840 public GrammarAST getDecisionBlockAST(int decision) { 2841 Decision d = getDecision(decision); 2842 if ( d==null ) { 2843 return null; 2844 } 2845 return d.blockAST; 2846 } 2847 2848 /** returns a list of column numbers for all decisions 2849 * on a particular line so ANTLRWorks choose the decision 2850 * depending on the location of the cursor (otherwise, 2851 * ANTLRWorks has to give the *exact* location which 2852 * is not easy from the user point of view). 2853 * 2854 * This is not particularly fast as it walks entire line:col→DFA map 2855 * looking for a prefix of "line:". 2856 */ getLookaheadDFAColumnsForLineInFile(int line)2857 public List<Integer> getLookaheadDFAColumnsForLineInFile(int line) { 2858 String prefix = line+":"; 2859 List<Integer> columns = new ArrayList<Integer>(); 2860 for (String key : lineColumnToLookaheadDFAMap.keySet()) { 2861 if(key.startsWith(prefix)) { 2862 columns.add(Integer.valueOf(key.substring(prefix.length()))); 2863 } 2864 } 2865 return columns; 2866 } 2867 2868 /** Useful for ANTLRWorks to map position in file to the DFA for display */ getLookaheadDFAFromPositionInFile(int line, int col)2869 public DFA getLookaheadDFAFromPositionInFile(int line, int col) { 2870 return lineColumnToLookaheadDFAMap.get( 2871 new StringBuffer().append(line).append(":").append(col).toString()); 2872 } 2873 getLineColumnToLookaheadDFAMap()2874 public Map<String, DFA> getLineColumnToLookaheadDFAMap() { 2875 return lineColumnToLookaheadDFAMap; 2876 } 2877 2878 /* 2879 public void setDecisionOptions(int decision, Map options) { 2880 Decision d = createDecision(decision); 2881 d.options = options; 2882 } 2883 2884 public void setDecisionOption(int decision, String name, Object value) { 2885 Decision d = getDecision(decision); 2886 if ( d!=null ) { 2887 if ( d.options==null ) { 2888 d.options = new HashMap(); 2889 } 2890 d.options.put(name,value); 2891 } 2892 } 2893 2894 public Map getDecisionOptions(int decision) { 2895 Decision d = getDecision(decision); 2896 if ( d==null ) { 2897 return null; 2898 } 2899 return d.options; 2900 } 2901 */ 2902 getNumberOfDecisions()2903 public int getNumberOfDecisions() { 2904 return decisionCount; 2905 } 2906 getNumberOfCyclicDecisions()2907 public int getNumberOfCyclicDecisions() { 2908 int n = 0; 2909 for (int i=1; i<=getNumberOfDecisions(); i++) { 2910 Decision d = getDecision(i); 2911 if ( d.dfa!=null && d.dfa.isCyclic() ) { 2912 n++; 2913 } 2914 } 2915 return n; 2916 } 2917 2918 /** Set the lookahead DFA for a particular decision. This means 2919 * that the appropriate AST node must updated to have the new lookahead 2920 * DFA. This method could be used to properly set the DFAs without 2921 * using the createLookaheadDFAs() method. You could do this 2922 * 2923 * Grammar g = new Grammar("..."); 2924 * g.setLookahead(1, dfa1); 2925 * g.setLookahead(2, dfa2); 2926 * ... 2927 */ setLookaheadDFA(int decision, DFA lookaheadDFA)2928 public void setLookaheadDFA(int decision, DFA lookaheadDFA) { 2929 Decision d = createDecision(decision); 2930 d.dfa = lookaheadDFA; 2931 GrammarAST ast = d.startState.associatedASTNode; 2932 ast.setLookaheadDFA(lookaheadDFA); 2933 } 2934 setDecisionNFA(int decision, NFAState state)2935 public void setDecisionNFA(int decision, NFAState state) { 2936 Decision d = createDecision(decision); 2937 d.startState = state; 2938 } 2939 setDecisionBlockAST(int decision, GrammarAST blockAST)2940 public void setDecisionBlockAST(int decision, GrammarAST blockAST) { 2941 //System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token); 2942 Decision d = createDecision(decision); 2943 d.blockAST = blockAST; 2944 } 2945 allDecisionDFAHaveBeenCreated()2946 public boolean allDecisionDFAHaveBeenCreated() { 2947 return allDecisionDFACreated; 2948 } 2949 2950 /** How many token types have been allocated so far? */ getMaxTokenType()2951 public int getMaxTokenType() { 2952 return composite.maxTokenType; 2953 } 2954 2955 /** What is the max char value possible for this grammar's target? Use 2956 * unicode max if no target defined. 2957 */ getMaxCharValue()2958 public int getMaxCharValue() { 2959 if ( generator!=null ) { 2960 return generator.target.getMaxCharValue(generator); 2961 } 2962 else { 2963 return Label.MAX_CHAR_VALUE; 2964 } 2965 } 2966 2967 /** Return a set of all possible token or char types for this grammar */ getTokenTypes()2968 public IntSet getTokenTypes() { 2969 if ( type==LEXER ) { 2970 return getAllCharValues(); 2971 } 2972 return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType()); 2973 } 2974 2975 /** If there is a char vocabulary, use it; else return min to max char 2976 * as defined by the target. If no target, use max unicode char value. 2977 */ getAllCharValues()2978 public IntSet getAllCharValues() { 2979 if ( charVocabulary!=null ) { 2980 return charVocabulary; 2981 } 2982 IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue()); 2983 return allChar; 2984 } 2985 2986 /** Return a string representing the escaped char for code c. E.g., If c 2987 * has value 0x100, you will get "\u0100". ASCII gets the usual 2988 * char (non-hex) representation. Control characters are spit out 2989 * as unicode. While this is specially set up for returning Java strings, 2990 * it can be used by any language target that has the same syntax. :) 2991 * 2992 * 11/26/2005: I changed this to use double quotes, consistent with antlr.g 2993 * 12/09/2005: I changed so everything is single quotes 2994 */ getANTLRCharLiteralForChar(int c)2995 public static String getANTLRCharLiteralForChar(int c) { 2996 if ( c<Label.MIN_CHAR_VALUE ) { 2997 ErrorManager.internalError("invalid char value "+c); 2998 return "'<INVALID>'"; 2999 } 3000 if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) { 3001 return '\''+ANTLRLiteralCharValueEscape[c]+'\''; 3002 } 3003 if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN && 3004 !Character.isISOControl((char)c) ) { 3005 if ( c=='\\' ) { 3006 return "'\\\\'"; 3007 } 3008 if ( c=='\'') { 3009 return "'\\''"; 3010 } 3011 return '\''+Character.toString((char)c)+'\''; 3012 } 3013 // turn on the bit above max "\uFFFF" value so that we pad with zeros 3014 // then only take last 4 digits 3015 String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5); 3016 String unicodeStr = "'\\u"+hex+"'"; 3017 return unicodeStr; 3018 } 3019 3020 /** For lexer grammars, return everything in unicode not in set. 3021 * For parser and tree grammars, return everything in token space 3022 * from MIN_TOKEN_TYPE to last valid token type or char value. 3023 */ complement(IntSet set)3024 public IntSet complement(IntSet set) { 3025 //System.out.println("complement "+set.toString(this)); 3026 //System.out.println("vocabulary "+getTokenTypes().toString(this)); 3027 IntSet c = set.complement(getTokenTypes()); 3028 //System.out.println("result="+c.toString(this)); 3029 return c; 3030 } 3031 complement(int atom)3032 public IntSet complement(int atom) { 3033 return complement(IntervalSet.of(atom)); 3034 } 3035 3036 /** Given set tree like ( SET A B ), check that A and B 3037 * are both valid sets themselves, else we must tree like a BLOCK 3038 */ isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t)3039 public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) { 3040 boolean valid; 3041 try { 3042 //System.out.println("parse BLOCK as set tree: "+t.toStringTree()); 3043 int alts = nfabuilder.testBlockAsSet(t); 3044 valid = alts > 1; 3045 } 3046 catch (RecognitionException re) { 3047 // The rule did not parse as a set, return null; ignore exception 3048 valid = false; 3049 } 3050 //System.out.println("valid? "+valid); 3051 return valid; 3052 } 3053 3054 /** Get the set equivalent (if any) of the indicated rule from this 3055 * grammar. Mostly used in the lexer to do ~T for some fragment rule 3056 * T. If the rule AST has a SET use that. If the rule is a single char 3057 * convert it to a set and return. If rule is not a simple set (w/o actions) 3058 * then return null. 3059 * Rules have AST form: 3060 * 3061 * ^( RULE ID modifier ARG RET SCOPE block EOR ) 3062 */ getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)3063 public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName) 3064 throws RecognitionException 3065 { 3066 Rule r = getRule(ruleName); 3067 if ( r==null ) { 3068 return null; 3069 } 3070 IntSet elements; 3071 //System.out.println("parsed tree: "+r.tree.toStringTree()); 3072 elements = nfabuilder.setRule(r.tree); 3073 //System.out.println("elements="+elements); 3074 return elements; 3075 } 3076 3077 /** Decisions are linked together with transition(1). Count how 3078 * many there are. This is here rather than in NFAState because 3079 * a grammar decides how NFAs are put together to form a decision. 3080 */ getNumberOfAltsForDecisionNFA(NFAState decisionState)3081 public int getNumberOfAltsForDecisionNFA(NFAState decisionState) { 3082 if ( decisionState==null ) { 3083 return 0; 3084 } 3085 int n = 1; 3086 NFAState p = decisionState; 3087 while ( p.transition[1] !=null ) { 3088 n++; 3089 p = (NFAState)p.transition[1].target; 3090 } 3091 return n; 3092 } 3093 3094 /** Get the ith alternative (1..n) from a decision; return null when 3095 * an invalid alt is requested. I must count in to find the right 3096 * alternative number. For (A|B), you get NFA structure (roughly): 3097 * 3098 * o->o-A->o 3099 * | 3100 * o->o-B->o 3101 * 3102 * This routine returns the leftmost state for each alt. So alt=1, returns 3103 * the upperleft most state in this structure. 3104 */ getNFAStateForAltOfDecision(NFAState decisionState, int alt)3105 public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) { 3106 if ( decisionState==null || alt<=0 ) { 3107 return null; 3108 } 3109 int n = 1; 3110 NFAState p = decisionState; 3111 while ( p!=null ) { 3112 if ( n==alt ) { 3113 return p; 3114 } 3115 n++; 3116 Transition next = p.transition[1]; 3117 p = null; 3118 if ( next!=null ) { 3119 p = (NFAState)next.target; 3120 } 3121 } 3122 return null; 3123 } 3124 3125 /* 3126 public void computeRuleFOLLOWSets() { 3127 if ( getNumberOfDecisions()==0 ) { 3128 createNFAs(); 3129 } 3130 for (Iterator it = getRules().iterator(); it.hasNext();) { 3131 Rule r = (Rule)it.next(); 3132 if ( r.isSynPred ) { 3133 continue; 3134 } 3135 LookaheadSet s = ll1Analyzer.FOLLOW(r); 3136 System.out.println("FOLLOW("+r.name+")="+s); 3137 } 3138 } 3139 */ 3140 FIRST(NFAState s)3141 public LookaheadSet FIRST(NFAState s) { 3142 return ll1Analyzer.FIRST(s); 3143 } 3144 LOOK(NFAState s)3145 public LookaheadSet LOOK(NFAState s) { 3146 return ll1Analyzer.LOOK(s); 3147 } 3148 setCodeGenerator(CodeGenerator generator)3149 public void setCodeGenerator(CodeGenerator generator) { 3150 this.generator = generator; 3151 } 3152 getCodeGenerator()3153 public CodeGenerator getCodeGenerator() { 3154 return generator; 3155 } 3156 getGrammarTree()3157 public GrammarAST getGrammarTree() { 3158 return grammarTree; 3159 } 3160 setGrammarTree(GrammarAST value)3161 public void setGrammarTree(GrammarAST value) { 3162 grammarTree = value; 3163 } 3164 getTool()3165 public Tool getTool() { 3166 return tool; 3167 } 3168 setTool(Tool tool)3169 public void setTool(Tool tool) { 3170 this.tool = tool; 3171 } 3172 3173 /** given a token type and the text of the literal, come up with a 3174 * decent token type label. For now it's just T<type>. Actually, 3175 * if there is an aliased name from tokens like PLUS='+', use it. 3176 */ computeTokenNameFromLiteral(int tokenType, String literal)3177 public String computeTokenNameFromLiteral(int tokenType, String literal) { 3178 return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType; 3179 } 3180 3181 @Override toString()3182 public String toString() { 3183 // return "FFFFFFFFFFFFFF"; 3184 return grammarTreeToString(grammarTree); 3185 } 3186 grammarTreeToString(GrammarAST t)3187 public String grammarTreeToString(GrammarAST t) { 3188 return grammarTreeToString(t, true); 3189 } 3190 grammarTreeToString(GrammarAST t, boolean showActions)3191 public String grammarTreeToString(GrammarAST t, boolean showActions) { 3192 String s; 3193 try { 3194 s = t.getLine()+":"+(t.getCharPositionInLine()+1)+": "; 3195 s += new ANTLRTreePrinter(new CommonTreeNodeStream(t)).toString(this, showActions); 3196 } 3197 catch (Exception e) { 3198 s = "<invalid or missing tree structure>"; 3199 } 3200 return s; 3201 } 3202 printGrammar(PrintStream output)3203 public void printGrammar(PrintStream output) { 3204 ANTLRTreePrinter printer = new ANTLRTreePrinter(new CommonTreeNodeStream(getGrammarTree())); 3205 try { 3206 String g = printer.toString(this, false); 3207 output.println(g); 3208 } 3209 catch (RecognitionException re) { 3210 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re); 3211 } 3212 } 3213 3214 } 3215