1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.tool; 29 30 import org.antlr.Tool; 31 import org.antlr.analysis.*; 32 import org.antlr.analysis.DFA; 33 import org.antlr.codegen.CodeGenerator; 34 import org.antlr.codegen.*; 35 import org.antlr.grammar.v3.*; 36 import org.antlr.misc.*; 37 import org.antlr.misc.Utils; 38 import org.antlr.runtime.*; 39 import org.antlr.runtime.tree.CommonTreeNodeStream; 40 import org.stringtemplate.v4.ST; 41 import org.stringtemplate.v4.STGroup; 42 import org.stringtemplate.v4.STGroupString; 43 44 import java.io.*; 45 import java.util.*; 46 47 /** Represents a grammar in memory. */ 48 public class Grammar { 49 public static final String SYNPRED_RULE_PREFIX = "synpred"; 50 51 public static final String GRAMMAR_FILE_EXTENSION = ".g"; 52 53 /** used for generating lexer temp files */ 54 public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g"; 55 56 public static final int INITIAL_DECISION_LIST_SIZE = 300; 57 public static final int INVALID_RULE_INDEX = -1; 58 59 // the various kinds of labels. t=type, id=ID, types+=type ids+=ID 60 public static final int RULE_LABEL = 1; 61 public static final int TOKEN_LABEL = 2; 62 public static final int RULE_LIST_LABEL = 3; 63 public static final int TOKEN_LIST_LABEL = 4; 64 public static final int CHAR_LABEL = 5; // used in lexer for x='a' 65 public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=. 66 public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=. 67 68 69 public static String[] LabelTypeToString = 70 {"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"}; 71 72 public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens"; 73 public static final String FRAGMENT_RULE_MODIFIER = "fragment"; 74 75 public static final String SYNPREDGATE_ACTION_NAME = "synpredgate"; 76 77 /** When converting ANTLR char and string literals, here is the 78 * value set of escape chars. 79 */ 80 public static int ANTLRLiteralEscapedCharValue[] = new int[255]; 81 82 /** Given a char, we need to be able to show as an ANTLR literal. 83 */ 84 public static String ANTLRLiteralCharValueEscape[] = new String[255]; 85 86 static { 87 ANTLRLiteralEscapedCharValue['n'] = '\n'; 88 ANTLRLiteralEscapedCharValue['r'] = '\r'; 89 ANTLRLiteralEscapedCharValue['t'] = '\t'; 90 ANTLRLiteralEscapedCharValue['b'] = '\b'; 91 ANTLRLiteralEscapedCharValue['f'] = '\f'; 92 ANTLRLiteralEscapedCharValue['\\'] = '\\'; 93 ANTLRLiteralEscapedCharValue['\''] = '\''; 94 ANTLRLiteralEscapedCharValue['"'] = '"'; 95 ANTLRLiteralCharValueEscape['\n'] = "\\n"; 96 ANTLRLiteralCharValueEscape['\r'] = "\\r"; 97 ANTLRLiteralCharValueEscape['\t'] = "\\t"; 98 ANTLRLiteralCharValueEscape['\b'] = "\\b"; 99 ANTLRLiteralCharValueEscape['\f'] = "\\f"; 100 ANTLRLiteralCharValueEscape['\\'] = "\\\\"; 101 ANTLRLiteralCharValueEscape['\''] = "\\'"; 102 } 103 104 public static final int LEXER = 1; 105 public static final int PARSER = 2; 106 public static final int TREE_PARSER = 3; 107 public static final int COMBINED = 4; 108 public static final String[] grammarTypeToString = new String[] { 109 "<invalid>", 110 "lexer", 111 "parser", 112 "tree", 113 "combined" 114 }; 115 116 public static final String[] grammarTypeToFileNameSuffix = new String[] { 117 "<invalid>", 118 "Lexer", 119 "Parser", 120 "", // no suffix for tree grammars 121 "Parser" // if combined grammar, gen Parser and Lexer will be done later 122 }; 123 124 /** Set of valid imports. E.g., can only import a tree parser into 125 * another tree parser. Maps delegate to set of delegator grammar types. 126 * validDelegations.get(LEXER) gives list of the kinds of delegators 127 * that can import lexers. 128 */ 129 public static MultiMap<Integer,Integer> validDelegations = 130 new MultiMap<Integer,Integer>() { 131 { 132 map(LEXER, LEXER); 133 map(LEXER, PARSER); 134 map(LEXER, COMBINED); 135 136 map(PARSER, PARSER); 137 map(PARSER, COMBINED); 138 139 map(TREE_PARSER, TREE_PARSER); 140 141 // TODO: allow COMBINED 142 // map(COMBINED, COMBINED); 143 } 144 }; 145 146 /** This is the buffer of *all* tokens found in the grammar file 147 * including whitespace tokens etc... I use this to extract 148 * lexer rules from combined grammars. 149 */ 150 public CommonTokenStream tokenBuffer; 151 public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__"; 152 public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__"; 153 154 public static class Decision { 155 public Grammar grammar; 156 public int decision; 157 public NFAState startState; 158 public GrammarAST blockAST; 159 public DFA dfa; 160 } 161 162 public class LabelElementPair { 163 public Token label; 164 public GrammarAST elementRef; 165 public String referencedRuleName; 166 /** Has an action referenced the label? Set by ActionAnalysis.g 167 * Currently only set for rule labels. 168 */ 169 public boolean actionReferencesLabel; 170 public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL} LabelElementPair(Token label, GrammarAST elementRef)171 public LabelElementPair(Token label, GrammarAST elementRef) { 172 this.label = label; 173 this.elementRef = elementRef; 174 this.referencedRuleName = elementRef.getText(); 175 } getReferencedRule()176 public Rule getReferencedRule() { 177 return getRule(referencedRuleName); 178 } toString()179 public String toString() { 180 return elementRef.toString(); 181 } 182 } 183 184 /** What name did the user provide for this grammar? */ 185 public String name; 186 187 /** What type of grammar is this: lexer, parser, tree walker */ 188 public int type; 189 190 /** A list of options specified at the grammar level such as language=Java. 191 * The value can be an AST for complicated values such as character sets. 192 * There may be code generator specific options in here. I do no 193 * interpretation of the key/value pairs...they are simply available for 194 * who wants them. 195 */ 196 protected Map options; 197 198 public static final Set legalLexerOptions = 199 new HashSet() { 200 { 201 add("language"); add("tokenVocab"); 202 add("TokenLabelType"); 203 add("superClass"); 204 add("filter"); 205 add("k"); 206 add("backtrack"); 207 add("memoize"); 208 } 209 }; 210 211 public static final Set legalParserOptions = 212 new HashSet() { 213 { 214 add("language"); add("tokenVocab"); 215 add("output"); add("rewrite"); add("ASTLabelType"); 216 add("TokenLabelType"); 217 add("superClass"); 218 add("k"); 219 add("backtrack"); 220 add("memoize"); 221 } 222 }; 223 224 public static final Set legalTreeParserOptions = 225 new HashSet() { 226 { 227 add("language"); add("tokenVocab"); 228 add("output"); add("rewrite"); add("ASTLabelType"); 229 add("TokenLabelType"); 230 add("superClass"); 231 add("k"); 232 add("backtrack"); 233 add("memoize"); 234 add("filter"); 235 } 236 }; 237 238 public static final Set doNotCopyOptionsToLexer = 239 new HashSet() { 240 { 241 add("output"); add("ASTLabelType"); add("superClass"); 242 add("k"); add("backtrack"); add("memoize"); add("rewrite"); 243 } 244 }; 245 246 public static final Map defaultOptions = 247 new HashMap() { 248 { 249 put("language","Java"); 250 } 251 }; 252 253 public static final Set legalBlockOptions = 254 new HashSet() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}}; 255 256 /** What are the default options for a subrule? */ 257 public static final Map defaultBlockOptions = 258 new HashMap() {{put("greedy","true");}}; 259 260 public static final Map defaultLexerBlockOptions = 261 new HashMap() {{put("greedy","true");}}; 262 263 // Token options are here to avoid contaminating Token object in runtime 264 265 /** Legal options for terminal refs like ID<node=MyVarNode> */ 266 public static final Set legalTokenOptions = 267 new HashSet() { 268 { 269 add(defaultTokenOption); 270 add("type"); 271 add("text"); 272 add("assoc"); 273 } 274 }; 275 276 public static final String defaultTokenOption = "node"; 277 278 /** Is there a global fixed lookahead set for this grammar? 279 * If 0, nothing specified. -1 implies we have not looked at 280 * the options table yet to set k. 281 */ 282 protected int global_k = -1; 283 284 /** Map a scope to a map of name:action pairs. 285 * Map<String, Map<String,GrammarAST>> 286 * The code generator will use this to fill holes in the output files. 287 * I track the AST node for the action in case I need the line number 288 * for errors. 289 */ 290 private Map<String, Map<String, Object>> actions = 291 new HashMap<String, Map<String, Object>>(); 292 293 /** The NFA that represents the grammar with edges labelled with tokens 294 * or epsilon. It is more suitable to analysis than an AST representation. 295 */ 296 public NFA nfa; 297 298 protected NFAFactory factory; 299 300 /** If this grammar is part of a larger composite grammar via delegate 301 * statement, then this points at the composite. The composite holds 302 * a global list of rules, token types, decision numbers, etc... 303 */ 304 public CompositeGrammar composite; 305 306 /** A pointer back into grammar tree. Needed so we can add delegates. */ 307 public CompositeGrammarTree compositeTreeNode; 308 309 /** If this is a delegate of another grammar, this is the label used 310 * as an instance var by that grammar to point at this grammar. null 311 * if no label was specified in the delegate statement. 312 */ 313 public String label; 314 315 /** TODO: hook this to the charVocabulary option */ 316 protected IntSet charVocabulary = null; 317 318 /** For ANTLRWorks, we want to be able to map a line:col to a specific 319 * decision DFA so it can display DFA. 320 */ 321 Map lineColumnToLookaheadDFAMap = new HashMap(); 322 323 public Tool tool; 324 325 /** The unique set of all rule references in any rule; set of tree node 326 * objects so two refs to same rule can exist but at different line/position. 327 */ 328 protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>(); 329 330 protected Set<GrammarAST> scopedRuleRefs = new HashSet(); 331 332 /** The unique set of all token ID references in any rule */ 333 protected Set<Token> tokenIDRefs = new HashSet<Token>(); 334 335 /** Be able to assign a number to every decision in grammar; 336 * decisions in 1..n 337 */ 338 protected int decisionCount = 0; 339 340 /** A list of all rules that are in any left-recursive cycle. There 341 * could be multiple cycles, but this is a flat list of all problematic 342 * rules. This is stuff we couldn't refactor to precedence rule. 343 */ 344 protected Set<Rule> leftRecursiveRules; 345 346 /** An external tool requests that DFA analysis abort prematurely. Stops 347 * at DFA granularity, which are limited to a DFA size and time computation 348 * as failsafe. 349 */ 350 protected boolean externalAnalysisAbort; 351 352 public int numNonLLStar = 0; // hack to track for -report 353 354 /** When we read in a grammar, we track the list of syntactic predicates 355 * and build faux rules for them later. See my blog entry Dec 2, 2005: 356 * http://www.antlr.org/blog/antlr3/lookahead.tml 357 * This maps the name (we make up) for a pred to the AST grammar fragment. 358 */ 359 protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap; 360 361 /** Each left-recursive precedence rule must define precedence array 362 * for binary operators like: 363 * 364 * static int[] e_prec = new int[tokenNames.length]; 365 * static { 366 * e_prec[75] = 1; 367 * } 368 * Track and we push into parser later; this is computed 369 * early when we look for prec rules. 370 */ 371 public List<String> precRuleInitCodeBlocks = new ArrayList<String>(); 372 373 /** At least one rule has memoize=true */ 374 public boolean atLeastOneRuleMemoizes; 375 376 /** At least one backtrack=true in rule or decision or grammar. */ 377 public boolean atLeastOneBacktrackOption; 378 379 /** Was this created from a COMBINED grammar? */ 380 public boolean implicitLexer; 381 382 /** Map a rule to it's Rule object */ 383 protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>(); 384 385 /** If this rule is a delegate, some rules might be overridden; don't 386 * want to gen code for them. 387 */ 388 public Set<String> overriddenRules = new HashSet<String>(); 389 390 /** The list of all rules referenced in this grammar, not defined here, 391 * and defined in a delegate grammar. Not all of these will be generated 392 * in the recognizer for this file; only those that are affected by rule 393 * definitions in this grammar. I am not sure the Java target will need 394 * this but I'm leaving in case other targets need it. 395 * see NameSpaceChecker.lookForReferencesToUndefinedSymbols() 396 */ 397 protected Set<Rule> delegatedRuleReferences = new HashSet(); 398 399 /** The ANTLRParser tracks lexer rules when reading combined grammars 400 * so we can build the Tokens rule. 401 */ 402 public List<String> lexerRuleNamesInCombined = new ArrayList<String>(); 403 404 /** Track the scopes defined outside of rules and the scopes associated 405 * with all rules (even if empty). 406 */ 407 protected Map scopes = new HashMap(); 408 409 /** An AST that records entire input grammar with all rules. A simple 410 * grammar with one rule, "grammar t; a : A | B ;", looks like: 411 * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) ) 412 */ 413 protected GrammarAST grammarTree = null; 414 415 /** Each subrule/rule is a decision point and we must track them so we 416 * can go back later and build DFA predictors for them. This includes 417 * all the rules, subrules, optional blocks, ()+, ()* etc... 418 */ 419 protected Vector<Decision> indexToDecision = 420 new Vector<Decision>(INITIAL_DECISION_LIST_SIZE); 421 422 /** If non-null, this is the code generator we will use to generate 423 * recognizers in the target language. 424 */ 425 protected CodeGenerator generator; 426 427 public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this); 428 429 public LL1Analyzer ll1Analyzer = new LL1Analyzer(this); 430 431 /** For merged lexer/parsers, we must construct a separate lexer spec. 432 * This is the template for lexer; put the literals first then the 433 * regular rules. We don't need to specify a token vocab import as 434 * I make the new grammar import from the old all in memory; don't want 435 * to force it to read from the disk. Lexer grammar will have same 436 * name as original grammar but will be in different filename. Foo.g 437 * with combined grammar will have FooParser.java generated and 438 * Foo__.g with again Foo inside. It will however generate FooLexer.java 439 * as it's a lexer grammar. A bit odd, but autogenerated. Can tweak 440 * later if we want. 441 */ 442 protected String lexerGrammarTemplate = 443 "grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" + 444 "lexer grammar <name>;\n" + 445 "<if(options)>" + 446 "options {\n" + 447 " <options:{it | <it.name>=<it.value>;<\\n>}>\n" + 448 "}<\\n>\n" + 449 "<endif>\n" + 450 "<if(imports)>import <imports; separator=\", \">;<endif>\n" + 451 "<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" + 452 "<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" + 453 "<rules>\n" + 454 ">>\n"; 455 protected ST lexerGrammarST; 456 457 /** What file name holds this grammar? */ 458 protected String fileName; 459 460 /** How long in ms did it take to build DFAs for this grammar? 461 * If this grammar is a combined grammar, it only records time for 462 * the parser grammar component. This only records the time to 463 * do the LL(*) work; NFA->DFA conversion. 464 */ 465 public long DFACreationWallClockTimeInMS; 466 467 public int numberOfSemanticPredicates = 0; 468 public int numberOfManualLookaheadOptions = 0; 469 public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>(); 470 public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates = 471 new HashSet<Integer>(); 472 473 /** Track decisions with syn preds specified for reporting. 474 * This is the a set of BLOCK type AST nodes. 475 */ 476 public Set<GrammarAST> blocksWithSynPreds = new HashSet(); 477 478 /** Track decisions that actually use the syn preds in the DFA. 479 * Computed during NFA to DFA conversion. 480 */ 481 public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>(); 482 483 /** Track names of preds so we can avoid generating preds that aren't used 484 * Computed during NFA to DFA conversion. Just walk accept states 485 * and look for synpreds because that is the only state target whose 486 * incident edges can have synpreds. Same is try for 487 * decisionsWhoseDFAsUsesSynPreds. 488 */ 489 public Set<String> synPredNamesUsedInDFA = new HashSet(); 490 491 /** Track decisions with syn preds specified for reporting. 492 * This is the a set of BLOCK type AST nodes. 493 */ 494 public Set<GrammarAST> blocksWithSemPreds = new HashSet(); 495 496 /** Track decisions that actually use the syn preds in the DFA. */ 497 public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet(); 498 499 protected boolean allDecisionDFACreated = false; 500 501 /** We need a way to detect when a lexer grammar is autogenerated from 502 * another grammar or we are just sending in a string representing a 503 * grammar. We don't want to generate a .tokens file, for example, 504 * in such cases. 505 */ 506 protected boolean builtFromString = false; 507 508 /** Factored out the sanity checking code; delegate to it. */ 509 GrammarSanity sanity = new GrammarSanity(this); 510 511 /** Useful for asking questions about target during analysis */ 512 Target target; 513 514 /** Create a grammar from file name. */ Grammar(Tool tool, String fileName, CompositeGrammar composite)515 public Grammar(Tool tool, String fileName, CompositeGrammar composite) { 516 this.composite = composite; 517 setTool(tool); 518 setFileName(fileName); 519 // ensure we have the composite set to something 520 if ( composite.delegateGrammarTreeRoot==null ) { 521 composite.setDelegationRoot(this); 522 } 523 STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); 524 lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); 525 target = CodeGenerator.loadLanguageTarget((String) getOption("language")); 526 } 527 528 /** Useful for when you are sure that you are not part of a composite 529 * already. Used in Interp/RandomPhrase and testing. 530 */ Grammar()531 public Grammar() { this((Tool)null); } 532 Grammar(Tool tool)533 public Grammar(Tool tool) { 534 setTool(tool); 535 builtFromString = true; 536 composite = new CompositeGrammar(this); 537 STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate); 538 lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar"); 539 target = CodeGenerator.loadLanguageTarget((String)getOption("language")); 540 } 541 542 /** Used for testing; only useful on noncomposite grammars.*/ Grammar(String grammarString)543 public Grammar(String grammarString) 544 throws RecognitionException 545 { 546 this(null, grammarString); 547 } 548 549 /** Used for testing and Interp/RandomPhrase. Only useful on 550 * noncomposite grammars. 551 */ Grammar(Tool tool, String grammarString)552 public Grammar(Tool tool, String grammarString) 553 throws RecognitionException 554 { 555 this(tool); 556 setFileName("<string>"); 557 StringReader r = new StringReader(grammarString); 558 parseAndBuildAST(r); 559 composite.assignTokenTypes(); 560 //composite.translateLeftRecursiveRules(); 561 addRulesForSyntacticPredicates(); 562 composite.defineGrammarSymbols(); 563 //composite.createNFAs(); 564 checkNameSpaceAndActions(); 565 } 566 setFileName(String fileName)567 public void setFileName(String fileName) { 568 this.fileName = fileName; 569 } 570 getFileName()571 public String getFileName() { 572 return fileName; 573 } 574 setName(String name)575 public void setName(String name) { 576 if ( name==null ) { 577 return; 578 } 579 // don't error check autogenerated files (those with '__' in them) 580 String saneFile = fileName.replace('\\', '/'); 581 int lastSlash = saneFile.lastIndexOf('/'); 582 String onlyFileName = saneFile.substring(lastSlash+1, fileName.length()); 583 if ( !builtFromString ) { 584 int lastDot = onlyFileName.lastIndexOf('.'); 585 String onlyFileNameNoSuffix = null; 586 if ( lastDot < 0 ) { 587 ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName); 588 onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION; 589 } 590 else { 591 onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot); 592 } 593 if ( !name.equals(onlyFileNameNoSuffix) ) { 594 ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER, 595 name, 596 fileName); 597 } 598 } 599 this.name = name; 600 } 601 setGrammarContent(String grammarString)602 public void setGrammarContent(String grammarString) throws RecognitionException { 603 StringReader r = new StringReader(grammarString); 604 parseAndBuildAST(r); 605 composite.assignTokenTypes(); 606 composite.defineGrammarSymbols(); 607 } 608 parseAndBuildAST()609 public void parseAndBuildAST() 610 throws IOException 611 { 612 FileReader fr = null; 613 BufferedReader br = null; 614 try { 615 fr = new FileReader(fileName); 616 br = new BufferedReader(fr); 617 parseAndBuildAST(br); 618 br.close(); 619 br = null; 620 } 621 finally { 622 if ( br!=null ) { 623 br.close(); 624 } 625 } 626 } 627 parseAndBuildAST(Reader r)628 public void parseAndBuildAST(Reader r) { 629 // BUILD AST FROM GRAMMAR 630 ANTLRLexer lexer; 631 try { 632 lexer = new ANTLRLexer(new ANTLRReaderStream(r)); 633 } catch (IOException e) { 634 ErrorManager.internalError("unexpected stream error from parsing "+fileName, e); 635 return; 636 } 637 638 lexer.setFileName(this.getFileName()); 639 tokenBuffer = new CommonTokenStream(lexer); 640 ANTLRParser parser = ANTLRParser.createParser(tokenBuffer); 641 parser.setFileName(this.getFileName()); 642 ANTLRParser.grammar__return result = null; 643 try { 644 result = parser.grammar_(this); 645 } 646 catch (RecognitionException re) { 647 ErrorManager.internalError("unexpected parser recognition error from "+fileName, re); 648 } 649 650 dealWithTreeFilterMode(); // tree grammar and filter=true? 651 652 if ( lexer.hasASTOperator && !buildAST() ) { 653 Object value = getOption("output"); 654 if ( value == null ) { 655 ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION, 656 this, null); 657 setOption("output", "AST", null); 658 } 659 else { 660 ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION, 661 this, null, value); 662 } 663 } 664 665 setGrammarTree((GrammarAST)result.getTree()); 666 667 //if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree()); 668 669 grammarTree.setUnknownTokenBoundaries(); 670 671 setFileName(lexer.getFileName()); // the lexer #src might change name 672 if ( grammarTree==null || grammarTree.findFirstType(ANTLRParser.RULE)==null ) { 673 ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName()); 674 return; 675 } 676 } 677 dealWithTreeFilterMode()678 protected void dealWithTreeFilterMode() { 679 Object filterMode = (String)getOption("filter"); 680 if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) { 681 // check for conflicting options 682 // filter => backtrack=true 683 // filter&&output=AST => rewrite=true 684 // filter&&output!=AST => error 685 // any deviation from valid option set is an error 686 Object backtrack = (String)getOption("backtrack"); 687 Object output = getOption("output"); 688 Object rewrite = getOption("rewrite"); 689 if ( backtrack!=null && !backtrack.toString().equals("true") ) { 690 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 691 "backtrack", backtrack); 692 } 693 if ( output!=null && !output.toString().equals("AST") ) { 694 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 695 "output", output); 696 setOption("output", "", null); 697 } 698 if ( rewrite!=null && !rewrite.toString().equals("true") ) { 699 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER, 700 "rewrite", rewrite); 701 } 702 // set options properly 703 setOption("backtrack", "true", null); 704 if ( output!=null && output.toString().equals("AST") ) { 705 setOption("rewrite", "true", null); 706 } 707 // @synpredgate set to state.backtracking==1 by code gen when filter=true 708 // superClass set in template target::treeParser 709 } 710 } 711 translateLeftRecursiveRule(GrammarAST ruleAST)712 public void translateLeftRecursiveRule(GrammarAST ruleAST) { 713 //System.out.println(ruleAST.toStringTree()); 714 CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST); 715 LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker = 716 new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName); 717 boolean isLeftRec = false; 718 try { 719 //System.out.println("TESTING "+ruleAST.enclosingRuleName); 720 isLeftRec = leftRecursiveRuleWalker.rec_rule(this); 721 } 722 catch (RecognitionException re) { 723 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re); 724 } 725 if ( !isLeftRec ) return; 726 List<String> rules = new ArrayList<String>(); 727 rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ; 728 rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() ); 729 rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() ); 730 for (String r : rules) { 731 GrammarAST t = parseArtificialRule(r); 732 addRule(grammarTree, t); 733 //System.out.println(t.toStringTree()); 734 } 735 736 //precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() ); 737 } 738 defineGrammarSymbols()739 public void defineGrammarSymbols() { 740 if ( Tool.internalOption_PrintGrammarTree ) { 741 System.out.println(grammarTree.toStringList()); 742 } 743 744 // DEFINE RULES 745 //System.out.println("### define "+name+" rules"); 746 DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree())); 747 try { 748 defineItemsWalker.grammar_(this); 749 } 750 catch (RecognitionException re) { 751 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, 752 re); 753 } 754 } 755 756 /** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */ checkNameSpaceAndActions()757 public void checkNameSpaceAndActions() { 758 examineAllExecutableActions(); 759 checkAllRulesForUselessLabels(); 760 761 nameSpaceChecker.checkConflicts(); 762 } 763 764 /** Many imports are illegal such as lexer into a tree grammar */ validImport(Grammar delegate)765 public boolean validImport(Grammar delegate) { 766 List<Integer> validDelegators = validDelegations.get(delegate.type); 767 return validDelegators!=null && validDelegators.contains(this.type); 768 } 769 770 /** If the grammar is a combined grammar, return the text of the implicit 771 * lexer grammar. 772 */ getLexerGrammar()773 public String getLexerGrammar() { 774 if ( lexerGrammarST.getAttribute("literals")==null && 775 lexerGrammarST.getAttribute("rules")==null ) 776 { 777 // if no rules, return nothing 778 return null; 779 } 780 lexerGrammarST.add("name", name); 781 // if there are any actions set for lexer, pass them in 782 if ( getActions().get("lexer")!=null ) { 783 lexerGrammarST.add("actionNames", 784 getActions().get("lexer").keySet()); 785 lexerGrammarST.add("actions", 786 getActions().get("lexer").values()); 787 } 788 // make sure generated grammar has the same options 789 if ( options!=null ) { 790 Iterator optionNames = options.keySet().iterator(); 791 while (optionNames.hasNext()) { 792 String optionName = (String) optionNames.next(); 793 if ( !doNotCopyOptionsToLexer.contains(optionName) ) { 794 Object value = options.get(optionName); 795 lexerGrammarST.addAggr("options.{name,value}", optionName, value); 796 } 797 } 798 } 799 return lexerGrammarST.render(); 800 } 801 getImplicitlyGeneratedLexerFileName()802 public String getImplicitlyGeneratedLexerFileName() { 803 return name+ 804 IGNORE_STRING_IN_GRAMMAR_FILE_NAME + 805 LEXER_GRAMMAR_FILE_EXTENSION; 806 } 807 808 /** Get the name of the generated recognizer; may or may not be same 809 * as grammar name. 810 * Recognizer is TParser and TLexer from T if combined, else 811 * just use T regardless of grammar type. 812 */ getRecognizerName()813 public String getRecognizerName() { 814 String suffix = ""; 815 List<Grammar> grammarsFromRootToMe = composite.getDelegators(this); 816 //System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe); 817 String qualifiedName = name; 818 if ( grammarsFromRootToMe!=null ) { 819 StringBuffer buf = new StringBuffer(); 820 for (Grammar g : grammarsFromRootToMe) { 821 buf.append(g.name); 822 buf.append('_'); 823 } 824 buf.append(name); 825 qualifiedName = buf.toString(); 826 } 827 if ( type==Grammar.COMBINED || 828 (type==Grammar.LEXER && implicitLexer) ) 829 { 830 suffix = Grammar.grammarTypeToFileNameSuffix[type]; 831 } 832 return qualifiedName+suffix; 833 } 834 835 /** Parse a rule we add artificially that is a list of the other lexer 836 * rules like this: "Tokens : ID | INT | SEMI ;" nextToken() will invoke 837 * this to set the current token. Add char literals before 838 * the rule references. 839 * 840 * If in filter mode, we want every alt to backtrack and we need to 841 * do k=1 to force the "first token def wins" rule. Otherwise, the 842 * longest-match rule comes into play with LL(*). 843 * 844 * The ANTLRParser antlr.g file now invokes this when parsing a lexer 845 * grammar, which I think is proper even though it peeks at the info 846 * that later phases will (re)compute. It gets a list of lexer rules 847 * and builds a string representing the rule; then it creates a parser 848 * and adds the resulting tree to the grammar's tree. 849 */ addArtificialMatchTokensRule(GrammarAST grammarAST, List<String> ruleNames, List<String> delegateNames, boolean filterMode)850 public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST, 851 List<String> ruleNames, 852 List<String> delegateNames, 853 boolean filterMode) { 854 ST matchTokenRuleST = null; 855 if ( filterMode ) { 856 matchTokenRuleST = new ST( 857 ARTIFICIAL_TOKENS_RULENAME+ 858 " options {k=1; backtrack=true;} : <rules; separator=\"|\">;"); 859 } 860 else { 861 matchTokenRuleST = new ST( 862 ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;"); 863 } 864 865 // Now add token rule references 866 for (int i = 0; i < ruleNames.size(); i++) { 867 String rname = (String) ruleNames.get(i); 868 matchTokenRuleST.add("rules", rname); 869 } 870 for (int i = 0; i < delegateNames.size(); i++) { 871 String dname = (String) delegateNames.get(i); 872 matchTokenRuleST.add("rules", dname+".Tokens"); 873 } 874 //System.out.println("tokens rule: "+matchTokenRuleST.toString()); 875 GrammarAST r = parseArtificialRule(matchTokenRuleST.render()); 876 addRule(grammarAST, r); 877 //addRule((GrammarAST)parser.getAST()); 878 //return (GrammarAST)parser.getAST(); 879 return r; 880 } 881 parseArtificialRule(String ruleText)882 public GrammarAST parseArtificialRule(String ruleText) { 883 ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText)); 884 ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer)); 885 parser.setGrammar(this); 886 parser.setGrammarType(this.type); 887 try { 888 ANTLRParser.rule_return result = parser.rule(); 889 return (GrammarAST)result.getTree(); 890 } 891 catch (Exception e) { 892 ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE, 893 e); 894 return null; 895 } 896 } 897 addRule(GrammarAST grammarTree, GrammarAST t)898 public void addRule(GrammarAST grammarTree, GrammarAST t) { 899 GrammarAST p = null; 900 for (int i = 0; i < grammarTree.getChildCount(); i++ ) { 901 p = (GrammarAST)grammarTree.getChild(i); 902 if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) { 903 break; 904 } 905 } 906 907 if (p != null) { 908 grammarTree.addChild(t); 909 } 910 } 911 912 /** for any syntactic predicates, we need to define rules for them; they will get 913 * defined automatically like any other rule. :) 914 */ getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)915 protected List getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap) 916 { 917 List<GrammarAST> rules = new ArrayList<GrammarAST>(); 918 if ( nameToSynpredASTMap==null ) { 919 return rules; 920 } 921 boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR; 922 for (String synpredName : nameToSynpredASTMap.keySet()) { 923 GrammarAST fragmentAST = nameToSynpredASTMap.get(synpredName); 924 GrammarAST ruleAST = 925 ANTLRParser.createSimpleRuleAST(synpredName, 926 fragmentAST, 927 isLexer); 928 rules.add(ruleAST); 929 } 930 return rules; 931 } 932 addRulesForSyntacticPredicates()933 public void addRulesForSyntacticPredicates() { 934 // Get syn pred rules and add to existing tree 935 List synpredRules = 936 getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap); 937 for (int i = 0; i < synpredRules.size(); i++) { 938 GrammarAST rAST = (GrammarAST) synpredRules.get(i); 939 grammarTree.addChild(rAST); 940 } 941 } 942 943 /** Walk the list of options, altering this Grammar object according 944 * to any I recognize. 945 protected void processOptions() { 946 Iterator optionNames = options.keySet().iterator(); 947 while (optionNames.hasNext()) { 948 String optionName = (String) optionNames.next(); 949 Object value = options.get(optionName); 950 if ( optionName.equals("tokenVocab") ) { 951 952 } 953 } 954 } 955 */ 956 957 /** Define all the rule begin/end NFAStates to solve forward reference 958 * issues. Critical for composite grammars too. 959 * This is normally called on all root/delegates manually and then 960 * buildNFA() is called afterwards because the NFA construction needs 961 * to see rule start/stop states from potentially every grammar. Has 962 * to be have these created a priori. Testing routines will often 963 * just call buildNFA(), which forces a call to this method if not 964 * done already. Works ONLY for single noncomposite grammars. 965 */ createRuleStartAndStopNFAStates()966 public void createRuleStartAndStopNFAStates() { 967 //System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs"); 968 if ( nfa!=null ) { 969 return; 970 } 971 nfa = new NFA(this); 972 factory = new NFAFactory(nfa); 973 974 Collection rules = getRules(); 975 for (Iterator itr = rules.iterator(); itr.hasNext();) { 976 Rule r = (Rule) itr.next(); 977 String ruleName = r.name; 978 NFAState ruleBeginState = factory.newState(); 979 ruleBeginState.setDescription("rule "+ruleName+" start"); 980 ruleBeginState.enclosingRule = r; 981 r.startState = ruleBeginState; 982 NFAState ruleEndState = factory.newState(); 983 ruleEndState.setDescription("rule "+ruleName+" end"); 984 ruleEndState.setAcceptState(true); 985 ruleEndState.enclosingRule = r; 986 r.stopState = ruleEndState; 987 } 988 } 989 buildNFA()990 public void buildNFA() { 991 if ( nfa==null ) { 992 createRuleStartAndStopNFAStates(); 993 } 994 if ( nfa.complete ) { 995 // don't let it create more than once; has side-effects 996 return; 997 } 998 //System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs"); 999 if ( getRules().size()==0 ) { 1000 return; 1001 } 1002 1003 CommonTreeNodeStream input = new CommonTreeNodeStream(getGrammarTree()); 1004 TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(input, this, nfa, factory); 1005 try { 1006 nfaBuilder.grammar_(); 1007 } 1008 catch (RecognitionException re) { 1009 ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, 1010 name, 1011 re); 1012 } 1013 nfa.complete = true; 1014 } 1015 1016 /** For each decision in this grammar, compute a single DFA using the 1017 * NFA states associated with the decision. The DFA construction 1018 * determines whether or not the alternatives in the decision are 1019 * separable using a regular lookahead language. 1020 * 1021 * Store the lookahead DFAs in the AST created from the user's grammar 1022 * so the code generator or whoever can easily access it. 1023 * 1024 * This is a separate method because you might want to create a 1025 * Grammar without doing the expensive analysis. 1026 */ createLookaheadDFAs()1027 public void createLookaheadDFAs() { 1028 createLookaheadDFAs(true); 1029 } 1030 createLookaheadDFAs(boolean wackTempStructures)1031 public void createLookaheadDFAs(boolean wackTempStructures) { 1032 if ( nfa==null ) { 1033 buildNFA(); 1034 } 1035 1036 // CHECK FOR LEFT RECURSION; Make sure we can actually do analysis 1037 checkAllRulesForLeftRecursion(); 1038 1039 /* 1040 // was there a severe problem while sniffing the grammar? 1041 if ( ErrorManager.doNotAttemptAnalysis() ) { 1042 return; 1043 } 1044 */ 1045 1046 long start = System.currentTimeMillis(); 1047 1048 //System.out.println("### create DFAs"); 1049 int numDecisions = getNumberOfDecisions(); 1050 if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) { 1051 for (int decision=1; decision<=numDecisions; decision++) { 1052 NFAState decisionStartState = getDecisionNFAStartState(decision); 1053 if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) { 1054 // don't bother to process decisions within left recursive rules. 1055 if ( composite.watchNFAConversion ) { 1056 System.out.println("ignoring decision "+decision+ 1057 " within left-recursive rule "+decisionStartState.enclosingRule.name); 1058 } 1059 continue; 1060 } 1061 if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) { 1062 Rule r = decisionStartState.enclosingRule; 1063 if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) { 1064 continue; 1065 } 1066 DFA dfa = null; 1067 // if k=* or k=1, try LL(1) 1068 if ( getUserMaxLookahead(decision)==0 || 1069 getUserMaxLookahead(decision)==1 ) 1070 { 1071 dfa = createLL_1_LookaheadDFA(decision); 1072 } 1073 if ( dfa==null ) { 1074 if ( composite.watchNFAConversion ) { 1075 System.out.println("decision "+decision+ 1076 " not suitable for LL(1)-optimized DFA analysis"); 1077 } 1078 dfa = createLookaheadDFA(decision, wackTempStructures); 1079 } 1080 if ( dfa.startState==null ) { 1081 // something went wrong; wipe out DFA 1082 setLookaheadDFA(decision, null); 1083 } 1084 if ( Tool.internalOption_PrintDFA ) { 1085 System.out.println("DFA d="+decision); 1086 FASerializer serializer = new FASerializer(nfa.grammar); 1087 String result = serializer.serialize(dfa.startState); 1088 System.out.println(result); 1089 } 1090 } 1091 } 1092 } 1093 else { 1094 ErrorManager.info("two-threaded DFA conversion"); 1095 // create a barrier expecting n DFA and this main creation thread 1096 Barrier barrier = new Barrier(3); 1097 // assume 2 CPU for now 1098 int midpoint = numDecisions/2; 1099 NFAConversionThread t1 = 1100 new NFAConversionThread(this, barrier, 1, midpoint); 1101 new Thread(t1).start(); 1102 if ( midpoint == (numDecisions/2) ) { 1103 midpoint++; 1104 } 1105 NFAConversionThread t2 = 1106 new NFAConversionThread(this, barrier, midpoint, numDecisions); 1107 new Thread(t2).start(); 1108 // wait for these two threads to finish 1109 try { 1110 barrier.waitForRelease(); 1111 } 1112 catch(InterruptedException e) { 1113 ErrorManager.internalError("what the hell? DFA interruptus", e); 1114 } 1115 } 1116 1117 long stop = System.currentTimeMillis(); 1118 DFACreationWallClockTimeInMS = stop - start; 1119 1120 // indicate that we've finished building DFA (even if #decisions==0) 1121 allDecisionDFACreated = true; 1122 } 1123 createLL_1_LookaheadDFA(int decision)1124 public DFA createLL_1_LookaheadDFA(int decision) { 1125 Decision d = getDecision(decision); 1126 String enclosingRule = d.startState.enclosingRule.name; 1127 Rule r = d.startState.enclosingRule; 1128 NFAState decisionStartState = getDecisionNFAStartState(decision); 1129 1130 if ( composite.watchNFAConversion ) { 1131 System.out.println("--------------------\nattempting LL(1) DFA (d=" 1132 +decisionStartState.getDecisionNumber()+") for "+ 1133 decisionStartState.getDescription()); 1134 } 1135 1136 if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) { 1137 return null; 1138 } 1139 1140 // compute lookahead for each alt 1141 int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState); 1142 LookaheadSet[] altLook = new LookaheadSet[numAlts+1]; 1143 for (int alt = 1; alt <= numAlts; alt++) { 1144 int walkAlt = 1145 decisionStartState.translateDisplayAltToWalkAlt(alt); 1146 NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt); 1147 NFAState altStartState = (NFAState)altLeftEdge.transition[0].target; 1148 //System.out.println("alt "+alt+" start state = "+altStartState.stateNumber); 1149 altLook[alt] = ll1Analyzer.LOOK(altStartState); 1150 //System.out.println("alt "+alt+": "+altLook[alt].toString(this)); 1151 } 1152 1153 // compare alt i with alt j for disjointness 1154 boolean decisionIsLL_1 = true; 1155 outer: 1156 for (int i = 1; i <= numAlts; i++) { 1157 for (int j = i+1; j <= numAlts; j++) { 1158 /* 1159 System.out.println("compare "+i+", "+j+": "+ 1160 altLook[i].toString(this)+" with "+ 1161 altLook[j].toString(this)); 1162 */ 1163 LookaheadSet collision = altLook[i].intersection(altLook[j]); 1164 if ( !collision.isNil() ) { 1165 //System.out.println("collision (non-LL(1)): "+collision.toString(this)); 1166 decisionIsLL_1 = false; 1167 break outer; 1168 } 1169 } 1170 } 1171 1172 boolean foundConfoundingPredicate = 1173 ll1Analyzer.detectConfoundingPredicates(decisionStartState); 1174 if ( decisionIsLL_1 && !foundConfoundingPredicate ) { 1175 // build an LL(1) optimized DFA with edge for each altLook[i] 1176 if ( NFAToDFAConverter.debug ) { 1177 System.out.println("decision "+decision+" is simple LL(1)"); 1178 } 1179 DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook); 1180 setLookaheadDFA(decision, lookaheadDFA); 1181 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1182 return lookaheadDFA; 1183 } 1184 1185 // not LL(1) but perhaps we can solve with simplified predicate search 1186 // even if k=1 set manually, only resolve here if we have preds; i.e., 1187 // don't resolve etc... 1188 1189 /* 1190 SemanticContext visiblePredicates = 1191 ll1Analyzer.getPredicates(decisionStartState); 1192 boolean foundConfoundingPredicate = 1193 ll1Analyzer.detectConfoundingPredicates(decisionStartState); 1194 */ 1195 1196 // exit if not forced k=1 or we found a predicate situation we 1197 // can't handle: predicates in rules invoked from this decision. 1198 if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1 1199 !getAutoBacktrackMode(decision) || 1200 foundConfoundingPredicate ) 1201 { 1202 //System.out.println("trying LL(*)"); 1203 return null; 1204 } 1205 1206 List<IntervalSet> edges = new ArrayList<IntervalSet>(); 1207 for (int i = 1; i < altLook.length; i++) { 1208 LookaheadSet s = altLook[i]; 1209 edges.add((IntervalSet)s.tokenTypeSet); 1210 } 1211 List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges); 1212 //System.out.println("disjoint="+disjoint); 1213 1214 MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>(); 1215 for (int i = 0; i < disjoint.size(); i++) { 1216 IntervalSet ds = (IntervalSet) disjoint.get(i); 1217 for (int alt = 1; alt < altLook.length; alt++) { 1218 LookaheadSet look = altLook[alt]; 1219 if ( !ds.and(look.tokenTypeSet).isNil() ) { 1220 edgeMap.map(ds, alt); 1221 } 1222 } 1223 } 1224 //System.out.println("edge map: "+edgeMap); 1225 1226 // TODO: how do we know we covered stuff? 1227 1228 // build an LL(1) optimized DFA with edge for each altLook[i] 1229 DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap); 1230 setLookaheadDFA(decision, lookaheadDFA); 1231 1232 // create map from line:col to decision DFA (for ANTLRWorks) 1233 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1234 1235 return lookaheadDFA; 1236 } 1237 updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA)1238 private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) { 1239 GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber); 1240 int line = decisionAST.getLine(); 1241 int col = decisionAST.getCharPositionInLine(); 1242 lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line + ":") 1243 .append(col).toString(), lookaheadDFA); 1244 } 1245 makeEdgeSetsDisjoint(List<IntervalSet> edges)1246 protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) { 1247 OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>(); 1248 // walk each incoming edge label/set and add to disjoint set 1249 int numEdges = edges.size(); 1250 for (int e = 0; e < numEdges; e++) { 1251 IntervalSet t = (IntervalSet) edges.get(e); 1252 if ( disjointSets.contains(t) ) { // exact set present 1253 continue; 1254 } 1255 1256 // compare t with set i for disjointness 1257 IntervalSet remainder = t; // remainder starts out as whole set to add 1258 int numDisjointElements = disjointSets.size(); 1259 for (int i = 0; i < numDisjointElements; i++) { 1260 IntervalSet s_i = (IntervalSet)disjointSets.get(i); 1261 1262 if ( t.and(s_i).isNil() ) { // nothing in common 1263 continue; 1264 } 1265 //System.out.println(label+" collides with "+rl); 1266 1267 // For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t) 1268 // (ignoring s_i-t if nil; don't put in list) 1269 1270 // Replace existing s_i with intersection since we 1271 // know that will always be a non nil character class 1272 IntervalSet intersection = (IntervalSet)s_i.and(t); 1273 disjointSets.set(i, intersection); 1274 1275 // Compute s_i-t to see what is in current set and not in incoming 1276 IntSet existingMinusNewElements = s_i.subtract(t); 1277 //System.out.println(s_i+"-"+t+"="+existingMinusNewElements); 1278 if ( !existingMinusNewElements.isNil() ) { 1279 // found a new character class, add to the end (doesn't affect 1280 // outer loop duration due to n computation a priori. 1281 disjointSets.add(existingMinusNewElements); 1282 } 1283 1284 // anything left to add to the reachableLabels? 1285 remainder = (IntervalSet)t.subtract(s_i); 1286 if ( remainder.isNil() ) { 1287 break; // nothing left to add to set. done! 1288 } 1289 1290 t = remainder; 1291 } 1292 if ( !remainder.isNil() ) { 1293 disjointSets.add(remainder); 1294 } 1295 } 1296 return disjointSets.elements(); 1297 } 1298 createLookaheadDFA(int decision, boolean wackTempStructures)1299 public DFA createLookaheadDFA(int decision, boolean wackTempStructures) { 1300 Decision d = getDecision(decision); 1301 String enclosingRule = d.startState.enclosingRule.name; 1302 Rule r = d.startState.enclosingRule; 1303 1304 //System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA); 1305 NFAState decisionStartState = getDecisionNFAStartState(decision); 1306 long startDFA=0,stopDFA=0; 1307 if ( composite.watchNFAConversion ) { 1308 System.out.println("--------------------\nbuilding lookahead DFA (d=" 1309 +decisionStartState.getDecisionNumber()+") for "+ 1310 decisionStartState.getDescription()); 1311 startDFA = System.currentTimeMillis(); 1312 } 1313 1314 DFA lookaheadDFA = new DFA(decision, decisionStartState); 1315 // Retry to create a simpler DFA if analysis failed (non-LL(*), 1316 // recursion overflow, or time out). 1317 boolean failed = 1318 lookaheadDFA.probe.isNonLLStarDecision() || 1319 lookaheadDFA.probe.analysisOverflowed(); 1320 if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) { 1321 // set k=1 option and try again. 1322 // First, clean up tracking stuff 1323 decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA); 1324 // TODO: clean up synPredNamesUsedInDFA also (harder) 1325 d.blockAST.setBlockOption(this, "k", Utils.integer(1)); 1326 if ( composite.watchNFAConversion ) { 1327 System.out.print("trying decision "+decision+ 1328 " again with k=1; reason: "+ 1329 lookaheadDFA.getReasonForFailure()); 1330 } 1331 lookaheadDFA = null; // make sure other memory is "free" before redoing 1332 lookaheadDFA = new DFA(decision, decisionStartState); 1333 } 1334 1335 setLookaheadDFA(decision, lookaheadDFA); 1336 1337 if ( wackTempStructures ) { 1338 for (DFAState s : lookaheadDFA.getUniqueStates().values()) { 1339 s.reset(); 1340 } 1341 } 1342 1343 // create map from line:col to decision DFA (for ANTLRWorks) 1344 updateLineColumnToLookaheadDFAMap(lookaheadDFA); 1345 1346 if ( composite.watchNFAConversion ) { 1347 stopDFA = System.currentTimeMillis(); 1348 System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+ 1349 " states, "+(int)(stopDFA-startDFA)+" ms"); 1350 } 1351 //System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA); 1352 return lookaheadDFA; 1353 } 1354 1355 /** Terminate DFA creation (grammar analysis). 1356 */ externallyAbortNFAToDFAConversion()1357 public void externallyAbortNFAToDFAConversion() { 1358 externalAnalysisAbort = true; 1359 } 1360 NFAToDFAConversionExternallyAborted()1361 public boolean NFAToDFAConversionExternallyAborted() { 1362 return externalAnalysisAbort; 1363 } 1364 1365 /** Return a new unique integer in the token type space */ getNewTokenType()1366 public int getNewTokenType() { 1367 composite.maxTokenType++; 1368 return composite.maxTokenType; 1369 } 1370 1371 /** Define a token at a particular token type value. Blast an 1372 * old value with a new one. This is called normal grammar processsing 1373 * and during import vocab operations to set tokens with specific values. 1374 */ defineToken(String text, int tokenType)1375 public void defineToken(String text, int tokenType) { 1376 //System.out.println("defineToken("+text+", "+tokenType+")"); 1377 if ( composite.tokenIDToTypeMap.get(text)!=null ) { 1378 // already defined? Must be predefined one like EOF; 1379 // do nothing 1380 return; 1381 } 1382 // the index in the typeToTokenList table is actually shifted to 1383 // hold faux labels as you cannot have negative indices. 1384 if ( text.charAt(0)=='\'' ) { 1385 composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType)); 1386 // track in reverse index too 1387 if ( tokenType>=composite.typeToStringLiteralList.size() ) { 1388 composite.typeToStringLiteralList.setSize(tokenType+1); 1389 } 1390 composite.typeToStringLiteralList.set(tokenType, text); 1391 } 1392 else { // must be a label like ID 1393 composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType)); 1394 } 1395 int index = Label.NUM_FAUX_LABELS+tokenType-1; 1396 //System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index); 1397 composite.maxTokenType = Math.max(composite.maxTokenType, tokenType); 1398 if ( index>=composite.typeToTokenList.size() ) { 1399 composite.typeToTokenList.setSize(index+1); 1400 } 1401 String prevToken = (String)composite.typeToTokenList.get(index); 1402 if ( prevToken==null || prevToken.charAt(0)=='\'' ) { 1403 // only record if nothing there before or if thing before was a literal 1404 composite.typeToTokenList.set(index, text); 1405 } 1406 } 1407 1408 /** Define a new rule. A new rule index is created by incrementing 1409 * ruleIndex. 1410 */ defineRule(Token ruleToken, String modifier, Map options, GrammarAST tree, GrammarAST argActionAST, int numAlts)1411 public void defineRule(Token ruleToken, 1412 String modifier, 1413 Map options, 1414 GrammarAST tree, 1415 GrammarAST argActionAST, 1416 int numAlts) 1417 { 1418 String ruleName = ruleToken.getText(); 1419 if ( getLocallyDefinedRule(ruleName)!=null ) { 1420 ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION, 1421 this, ruleToken, ruleName); 1422 return; 1423 } 1424 1425 if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) && 1426 Character.isUpperCase(ruleName.charAt(0)) ) 1427 { 1428 ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED, 1429 this, ruleToken, ruleName); 1430 return; 1431 } 1432 1433 Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts); 1434 /* 1435 System.out.println("defineRule("+ruleName+",modifier="+modifier+ 1436 "): index="+r.index+", nalts="+numAlts); 1437 */ 1438 r.modifier = modifier; 1439 nameToRuleMap.put(ruleName, r); 1440 setRuleAST(ruleName, tree); 1441 r.setOptions(options, ruleToken); 1442 r.argActionAST = argActionAST; 1443 composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1); 1444 composite.ruleIndexToRuleList.set(composite.ruleIndex, r); 1445 composite.ruleIndex++; 1446 if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) { 1447 r.isSynPred = true; 1448 } 1449 } 1450 1451 /** Define a new predicate and get back its name for use in building 1452 * a semantic predicate reference to the syn pred. 1453 */ defineSyntacticPredicate(GrammarAST blockAST, String currentRuleName)1454 public String defineSyntacticPredicate(GrammarAST blockAST, 1455 String currentRuleName) 1456 { 1457 if ( nameToSynpredASTMap==null ) { 1458 nameToSynpredASTMap = new LinkedHashMap(); 1459 } 1460 String predName = 1461 SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name; 1462 blockAST.setTreeEnclosingRuleNameDeeply(predName); 1463 nameToSynpredASTMap.put(predName, blockAST); 1464 return predName; 1465 } 1466 getSyntacticPredicates()1467 public LinkedHashMap getSyntacticPredicates() { 1468 return nameToSynpredASTMap; 1469 } 1470 getSyntacticPredicate(String name)1471 public GrammarAST getSyntacticPredicate(String name) { 1472 if ( nameToSynpredASTMap==null ) { 1473 return null; 1474 } 1475 return (GrammarAST)nameToSynpredASTMap.get(name); 1476 } 1477 synPredUsedInDFA(DFA dfa, SemanticContext semCtx)1478 public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) { 1479 decisionsWhoseDFAsUsesSynPreds.add(dfa); 1480 semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds 1481 } 1482 1483 /* 1484 public Set<Rule> getRuleNamesVisitedDuringLOOK() { 1485 return rulesSensitiveToOtherRules; 1486 } 1487 */ 1488 1489 /** Given @scope::name {action} define it for this grammar. Later, 1490 * the code generator will ask for the actions table. For composite 1491 * grammars, make sure header action propogates down to all delegates. 1492 */ defineNamedAction(GrammarAST ampersandAST, String scope, GrammarAST nameAST, GrammarAST actionAST)1493 public void defineNamedAction(GrammarAST ampersandAST, 1494 String scope, 1495 GrammarAST nameAST, 1496 GrammarAST actionAST) 1497 { 1498 if ( scope==null ) { 1499 scope = getDefaultActionScope(type); 1500 } 1501 //System.out.println("Grammar "+name+" define @"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}"); 1502 String actionName = nameAST.getText(); 1503 Map<String, Object> scopeActions = getActions().get(scope); 1504 if ( scopeActions==null ) { 1505 scopeActions = new HashMap<String, Object>(); 1506 getActions().put(scope, scopeActions); 1507 } 1508 Object a = scopeActions.get(actionName); 1509 if ( a!=null ) { 1510 ErrorManager.grammarError( 1511 ErrorManager.MSG_ACTION_REDEFINITION,this, 1512 nameAST.getToken(),nameAST.getText()); 1513 } 1514 else { 1515 scopeActions.put(actionName,actionAST); 1516 } 1517 // propogate header (regardless of scope (lexer, parser, ...) ? 1518 if ( this==composite.getRootGrammar() && actionName.equals("header") ) { 1519 List<Grammar> allgrammars = composite.getRootGrammar().getDelegates(); 1520 for (Grammar delegate : allgrammars) { 1521 if ( target.isValidActionScope(delegate.type, scope) ) { 1522 //System.out.println("propogate to "+delegate.name); 1523 delegate.defineNamedAction(ampersandAST, scope, nameAST, actionAST); 1524 } 1525 } 1526 } 1527 } 1528 setSynPredGateIfNotAlready(ST gateST)1529 public void setSynPredGateIfNotAlready(ST gateST) { 1530 String scope = getDefaultActionScope(type); 1531 Map<String, Object> actionsForGrammarScope = getActions().get(scope); 1532 // if no synpredgate action set by user then set 1533 if ( (actionsForGrammarScope==null || 1534 !actionsForGrammarScope.containsKey(Grammar.SYNPREDGATE_ACTION_NAME)) ) 1535 { 1536 if ( actionsForGrammarScope==null ) { 1537 actionsForGrammarScope=new HashMap<String, Object>(); 1538 getActions().put(scope, actionsForGrammarScope); 1539 } 1540 actionsForGrammarScope.put(Grammar.SYNPREDGATE_ACTION_NAME, 1541 gateST); 1542 } 1543 } 1544 getActions()1545 public Map<String, Map<String, Object>> getActions() { 1546 return actions; 1547 } 1548 1549 /** Given a grammar type, what should be the default action scope? 1550 * If I say @members in a COMBINED grammar, for example, the 1551 * default scope should be "parser". 1552 */ getDefaultActionScope(int grammarType)1553 public String getDefaultActionScope(int grammarType) { 1554 switch (grammarType) { 1555 case Grammar.LEXER : 1556 return "lexer"; 1557 case Grammar.PARSER : 1558 case Grammar.COMBINED : 1559 return "parser"; 1560 case Grammar.TREE_PARSER : 1561 return "treeparser"; 1562 } 1563 return null; 1564 } 1565 defineLexerRuleFoundInParser(Token ruleToken, GrammarAST ruleAST)1566 public void defineLexerRuleFoundInParser(Token ruleToken, 1567 GrammarAST ruleAST) 1568 { 1569 // System.out.println("rule tree is:\n"+ruleAST.toStringTree()); 1570 /* 1571 String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex, 1572 ruleAST.ruleStopTokenIndex); 1573 */ 1574 // first, create the text of the rule 1575 StringBuffer buf = new StringBuffer(); 1576 buf.append("// $ANTLR src \""); 1577 buf.append(getFileName()); 1578 buf.append("\" "); 1579 buf.append(ruleAST.getLine()); 1580 buf.append("\n"); 1581 for (int i=ruleAST.getTokenStartIndex(); 1582 i<=ruleAST.getTokenStopIndex() && i<tokenBuffer.size(); 1583 i++) 1584 { 1585 CommonToken t = (CommonToken)tokenBuffer.get(i); 1586 // undo the text deletions done by the lexer (ugh) 1587 if ( t.getType()==ANTLRParser.BLOCK ) { 1588 buf.append("("); 1589 } 1590 else if ( t.getType()==ANTLRParser.ACTION ) { 1591 buf.append("{"); 1592 buf.append(t.getText()); 1593 buf.append("}"); 1594 } 1595 else if ( t.getType()==ANTLRParser.SEMPRED || 1596 t.getType()==ANTLRParser.SYN_SEMPRED || 1597 t.getType()==ANTLRParser.GATED_SEMPRED || 1598 t.getType()==ANTLRParser.BACKTRACK_SEMPRED ) 1599 { 1600 buf.append("{"); 1601 buf.append(t.getText()); 1602 buf.append("}?"); 1603 } 1604 else if ( t.getType()==ANTLRParser.ARG_ACTION ) { 1605 buf.append("["); 1606 buf.append(t.getText()); 1607 buf.append("]"); 1608 } 1609 else { 1610 buf.append(t.getText()); 1611 } 1612 } 1613 String ruleText = buf.toString(); 1614 //System.out.println("[["+ruleText+"]]"); 1615 // now put the rule into the lexer grammar template 1616 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1617 lexerGrammarST.add("rules", ruleText); 1618 } 1619 // track this lexer rule's name 1620 composite.lexerRules.add(ruleToken.getText()); 1621 } 1622 1623 /** If someone does PLUS='+' in the parser, must make sure we get 1624 * "PLUS : '+' ;" in lexer not "T73 : '+';" 1625 */ defineLexerRuleForAliasedStringLiteral(String tokenID, String literal, int tokenType)1626 public void defineLexerRuleForAliasedStringLiteral(String tokenID, 1627 String literal, 1628 int tokenType) 1629 { 1630 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1631 //System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType); 1632 lexerGrammarST.addAggr("literals.{ruleName,type,literal}", 1633 tokenID, 1634 Utils.integer(tokenType), 1635 literal); 1636 } 1637 // track this lexer rule's name 1638 composite.lexerRules.add(tokenID); 1639 } 1640 defineLexerRuleForStringLiteral(String literal, int tokenType)1641 public void defineLexerRuleForStringLiteral(String literal, int tokenType) { 1642 //System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType); 1643 // compute new token name like T237 and define it as having tokenType 1644 String tokenID = computeTokenNameFromLiteral(tokenType,literal); 1645 defineToken(tokenID, tokenType); 1646 // tell implicit lexer to define a rule to match the literal 1647 if ( getGrammarIsRoot() ) { // don't build lexers for delegates 1648 lexerGrammarST.addAggr("literals.{ruleName,type,literal}", 1649 tokenID, 1650 Utils.integer(tokenType), 1651 literal); 1652 } 1653 } 1654 getLocallyDefinedRule(String ruleName)1655 public Rule getLocallyDefinedRule(String ruleName) { 1656 Rule r = nameToRuleMap.get(ruleName); 1657 return r; 1658 } 1659 getRule(String ruleName)1660 public Rule getRule(String ruleName) { 1661 Rule r = composite.getRule(ruleName); 1662 /* 1663 if ( r!=null && r.grammar != this ) { 1664 System.out.println(name+".getRule("+ruleName+")="+r); 1665 } 1666 */ 1667 return r; 1668 } 1669 getRule(String scopeName, String ruleName)1670 public Rule getRule(String scopeName, String ruleName) { 1671 if ( scopeName!=null ) { // scope override 1672 Grammar scope = composite.getGrammar(scopeName); 1673 if ( scope==null ) { 1674 return null; 1675 } 1676 return scope.getLocallyDefinedRule(ruleName); 1677 } 1678 return getRule(ruleName); 1679 } 1680 getRuleIndex(String scopeName, String ruleName)1681 public int getRuleIndex(String scopeName, String ruleName) { 1682 Rule r = getRule(scopeName, ruleName); 1683 if ( r!=null ) { 1684 return r.index; 1685 } 1686 return INVALID_RULE_INDEX; 1687 } 1688 getRuleIndex(String ruleName)1689 public int getRuleIndex(String ruleName) { 1690 return getRuleIndex(null, ruleName); 1691 } 1692 getRuleName(int ruleIndex)1693 public String getRuleName(int ruleIndex) { 1694 Rule r = composite.ruleIndexToRuleList.get(ruleIndex); 1695 if ( r!=null ) { 1696 return r.name; 1697 } 1698 return null; 1699 } 1700 1701 /** Should codegen.g gen rule for ruleName? 1702 * If synpred, only gen if used in a DFA. 1703 * If regular rule, only gen if not overridden in delegator 1704 * Always gen Tokens rule though. 1705 */ generateMethodForRule(String ruleName)1706 public boolean generateMethodForRule(String ruleName) { 1707 if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) { 1708 // always generate Tokens rule to satisfy lexer interface 1709 // but it may have no alternatives. 1710 return true; 1711 } 1712 if ( overriddenRules.contains(ruleName) ) { 1713 // don't generate any overridden rules 1714 return false; 1715 } 1716 // generate if non-synpred or synpred used in a DFA 1717 Rule r = getLocallyDefinedRule(ruleName); 1718 return !r.isSynPred || 1719 (r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName)); 1720 } 1721 defineGlobalScope(String name, Token scopeAction)1722 public AttributeScope defineGlobalScope(String name, Token scopeAction) { 1723 AttributeScope scope = new AttributeScope(this, name, scopeAction); 1724 scopes.put(name,scope); 1725 return scope; 1726 } 1727 createReturnScope(String ruleName, Token retAction)1728 public AttributeScope createReturnScope(String ruleName, Token retAction) { 1729 AttributeScope scope = new AttributeScope(this, ruleName, retAction); 1730 scope.isReturnScope = true; 1731 return scope; 1732 } 1733 createRuleScope(String ruleName, Token scopeAction)1734 public AttributeScope createRuleScope(String ruleName, Token scopeAction) { 1735 AttributeScope scope = new AttributeScope(this, ruleName, scopeAction); 1736 scope.isDynamicRuleScope = true; 1737 return scope; 1738 } 1739 createParameterScope(String ruleName, Token argAction)1740 public AttributeScope createParameterScope(String ruleName, Token argAction) { 1741 AttributeScope scope = new AttributeScope(this, ruleName, argAction); 1742 scope.isParameterScope = true; 1743 return scope; 1744 } 1745 1746 /** Get a global scope */ getGlobalScope(String name)1747 public AttributeScope getGlobalScope(String name) { 1748 return (AttributeScope)scopes.get(name); 1749 } 1750 getGlobalScopes()1751 public Map getGlobalScopes() { 1752 return scopes; 1753 } 1754 1755 /** Define a label defined in a rule r; check the validity then ask the 1756 * Rule object to actually define it. 1757 */ defineLabel(Rule r, Token label, GrammarAST element, int type)1758 protected void defineLabel(Rule r, Token label, GrammarAST element, int type) { 1759 boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type); 1760 if ( err ) { 1761 return; 1762 } 1763 r.defineLabel(label, element, type); 1764 } 1765 defineTokenRefLabel(String ruleName, Token label, GrammarAST tokenRef)1766 public void defineTokenRefLabel(String ruleName, 1767 Token label, 1768 GrammarAST tokenRef) 1769 { 1770 Rule r = getLocallyDefinedRule(ruleName); 1771 if ( r!=null ) { 1772 if ( type==LEXER && 1773 (tokenRef.getType()==ANTLRParser.CHAR_LITERAL|| 1774 tokenRef.getType()==ANTLRParser.BLOCK|| 1775 tokenRef.getType()==ANTLRParser.NOT|| 1776 tokenRef.getType()==ANTLRParser.CHAR_RANGE|| 1777 tokenRef.getType()==ANTLRParser.WILDCARD)) 1778 { 1779 defineLabel(r, label, tokenRef, CHAR_LABEL); 1780 } 1781 else { 1782 defineLabel(r, label, tokenRef, TOKEN_LABEL); 1783 } 1784 } 1785 } 1786 defineWildcardTreeLabel(String ruleName, Token label, GrammarAST tokenRef)1787 public void defineWildcardTreeLabel(String ruleName, 1788 Token label, 1789 GrammarAST tokenRef) 1790 { 1791 Rule r = getLocallyDefinedRule(ruleName); 1792 if ( r!=null ) { 1793 defineLabel(r, label, tokenRef, WILDCARD_TREE_LABEL); 1794 } 1795 } 1796 defineWildcardTreeListLabel(String ruleName, Token label, GrammarAST tokenRef)1797 public void defineWildcardTreeListLabel(String ruleName, 1798 Token label, 1799 GrammarAST tokenRef) 1800 { 1801 Rule r = getLocallyDefinedRule(ruleName); 1802 if ( r!=null ) { 1803 defineLabel(r, label, tokenRef, WILDCARD_TREE_LIST_LABEL); 1804 } 1805 } 1806 defineRuleRefLabel(String ruleName, Token label, GrammarAST ruleRef)1807 public void defineRuleRefLabel(String ruleName, 1808 Token label, 1809 GrammarAST ruleRef) 1810 { 1811 Rule r = getLocallyDefinedRule(ruleName); 1812 if ( r!=null ) { 1813 defineLabel(r, label, ruleRef, RULE_LABEL); 1814 } 1815 } 1816 defineTokenListLabel(String ruleName, Token label, GrammarAST element)1817 public void defineTokenListLabel(String ruleName, 1818 Token label, 1819 GrammarAST element) 1820 { 1821 Rule r = getLocallyDefinedRule(ruleName); 1822 if ( r!=null ) { 1823 defineLabel(r, label, element, TOKEN_LIST_LABEL); 1824 } 1825 } 1826 defineRuleListLabel(String ruleName, Token label, GrammarAST element)1827 public void defineRuleListLabel(String ruleName, 1828 Token label, 1829 GrammarAST element) 1830 { 1831 Rule r = getLocallyDefinedRule(ruleName); 1832 if ( r!=null ) { 1833 if ( !r.getHasMultipleReturnValues() ) { 1834 ErrorManager.grammarError( 1835 ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this, 1836 label,label.getText()); 1837 } 1838 defineLabel(r, label, element, RULE_LIST_LABEL); 1839 } 1840 } 1841 1842 /** Given a set of all rewrite elements on right of ->, filter for 1843 * label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ... 1844 * Return a displayable token type name computed from the GrammarAST. 1845 */ getLabels(Set<GrammarAST> rewriteElements, int labelType)1846 public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) { 1847 Set<String> labels = new HashSet<String>(); 1848 for (GrammarAST el : rewriteElements) { 1849 if ( el.getType()==ANTLRParser.LABEL ) { 1850 String labelName = el.getText(); 1851 Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName); 1852 if ( enclosingRule==null ) continue; 1853 LabelElementPair pair = enclosingRule.getLabel(labelName); 1854 /* 1855 // if tree grammar and we have a wildcard, only notice it 1856 // when looking for rule labels not token label. x=. should 1857 // look like a rule ref since could be subtree. 1858 if ( type==TREE_PARSER && pair!=null && 1859 pair.elementRef.getType()==ANTLRParser.WILDCARD ) 1860 { 1861 if ( labelType==WILDCARD_TREE_LABEL ) { 1862 labels.add(labelName); 1863 continue; 1864 } 1865 else continue; 1866 } 1867 */ 1868 // if valid label and type is what we're looking for 1869 // and not ref to old value val $rule, add to list 1870 if ( pair!=null && pair.type==labelType && 1871 !labelName.equals(el.enclosingRuleName) ) 1872 { 1873 labels.add(labelName); 1874 } 1875 } 1876 } 1877 return labels; 1878 } 1879 1880 /** Before generating code, we examine all actions that can have 1881 * $x.y and $y stuff in them because some code generation depends on 1882 * Rule.referencedPredefinedRuleAttributes. I need to remove unused 1883 * rule labels for example. 1884 */ examineAllExecutableActions()1885 protected void examineAllExecutableActions() { 1886 Collection rules = getRules(); 1887 for (Iterator it = rules.iterator(); it.hasNext();) { 1888 Rule r = (Rule) it.next(); 1889 // walk all actions within the rule elements, args, and exceptions 1890 List<GrammarAST> actions = r.getInlineActions(); 1891 for (int i = 0; i < actions.size(); i++) { 1892 GrammarAST actionAST = (GrammarAST) actions.get(i); 1893 ActionAnalysis sniffer = 1894 new ActionAnalysis(this, r.name, actionAST); 1895 sniffer.analyze(); 1896 } 1897 // walk any named actions like @init, @after 1898 Collection<GrammarAST> namedActions = r.getActions().values(); 1899 for (Iterator it2 = namedActions.iterator(); it2.hasNext();) { 1900 GrammarAST actionAST = (GrammarAST) it2.next(); 1901 ActionAnalysis sniffer = 1902 new ActionAnalysis(this, r.name, actionAST); 1903 sniffer.analyze(); 1904 } 1905 } 1906 } 1907 1908 /** Remove all labels on rule refs whose target rules have no return value. 1909 * Do this for all rules in grammar. 1910 */ checkAllRulesForUselessLabels()1911 public void checkAllRulesForUselessLabels() { 1912 if ( type==LEXER ) { 1913 return; 1914 } 1915 Set rules = nameToRuleMap.keySet(); 1916 for (Iterator it = rules.iterator(); it.hasNext();) { 1917 String ruleName = (String) it.next(); 1918 Rule r = getRule(ruleName); 1919 removeUselessLabels(r.getRuleLabels()); 1920 removeUselessLabels(r.getRuleListLabels()); 1921 } 1922 } 1923 1924 /** A label on a rule is useless if the rule has no return value, no 1925 * tree or template output, and it is not referenced in an action. 1926 */ removeUselessLabels(Map ruleToElementLabelPairMap)1927 protected void removeUselessLabels(Map ruleToElementLabelPairMap) { 1928 if ( ruleToElementLabelPairMap==null ) { 1929 return; 1930 } 1931 Collection labels = ruleToElementLabelPairMap.values(); 1932 List kill = new ArrayList(); 1933 for (Iterator labelit = labels.iterator(); labelit.hasNext();) { 1934 LabelElementPair pair = (LabelElementPair) labelit.next(); 1935 Rule refdRule = getRule(pair.elementRef.getText()); 1936 if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) { 1937 //System.out.println(pair.label.getText()+" is useless"); 1938 kill.add(pair.label.getText()); 1939 } 1940 } 1941 for (int i = 0; i < kill.size(); i++) { 1942 String labelToKill = (String) kill.get(i); 1943 // System.out.println("kill "+labelToKill); 1944 ruleToElementLabelPairMap.remove(labelToKill); 1945 } 1946 } 1947 1948 /** Track a rule reference within an outermost alt of a rule. Used 1949 * at the moment to decide if $ruleref refers to a unique rule ref in 1950 * the alt. Rewrite rules force tracking of all rule AST results. 1951 * 1952 * This data is also used to verify that all rules have been defined. 1953 */ altReferencesRule(String enclosingRuleName, GrammarAST refScopeAST, GrammarAST refAST, int outerAltNum)1954 public void altReferencesRule(String enclosingRuleName, 1955 GrammarAST refScopeAST, 1956 GrammarAST refAST, 1957 int outerAltNum) 1958 { 1959 /* Do nothing for now; not sure need; track S.x as x 1960 String scope = null; 1961 Grammar scopeG = null; 1962 if ( refScopeAST!=null ) { 1963 if ( !scopedRuleRefs.contains(refScopeAST) ) { 1964 scopedRuleRefs.add(refScopeAST); 1965 } 1966 scope = refScopeAST.getText(); 1967 } 1968 */ 1969 Rule r = getRule(enclosingRuleName); 1970 if ( r==null ) { 1971 return; // no error here; see NameSpaceChecker 1972 } 1973 r.trackRuleReferenceInAlt(refAST, outerAltNum); 1974 Token refToken = refAST.getToken(); 1975 if ( !ruleRefs.contains(refAST) ) { 1976 ruleRefs.add(refAST); 1977 } 1978 } 1979 1980 /** Track a token reference within an outermost alt of a rule. Used 1981 * to decide if $tokenref refers to a unique token ref in 1982 * the alt. Does not track literals! 1983 * 1984 * Rewrite rules force tracking of all tokens. 1985 */ altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum)1986 public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) { 1987 Rule r = getLocallyDefinedRule(ruleName); 1988 if ( r==null ) { 1989 return; 1990 } 1991 r.trackTokenReferenceInAlt(refAST, outerAltNum); 1992 if ( !tokenIDRefs.contains(refAST.getToken()) ) { 1993 tokenIDRefs.add(refAST.getToken()); 1994 } 1995 } 1996 1997 /** To yield smaller, more readable code, track which rules have their 1998 * predefined attributes accessed. If the rule has no user-defined 1999 * return values, then don't generate the return value scope classes 2000 * etc... Make the rule have void return value. Don't track for lexer 2001 * rules. 2002 */ referenceRuleLabelPredefinedAttribute(String ruleName)2003 public void referenceRuleLabelPredefinedAttribute(String ruleName) { 2004 Rule r = getRule(ruleName); 2005 if ( r!=null && type!=LEXER ) { 2006 // indicate that an action ref'd an attr unless it's in a lexer 2007 // so that $ID.text refs don't force lexer rules to define 2008 // return values...Token objects are created by the caller instead. 2009 r.referencedPredefinedRuleAttributes = true; 2010 } 2011 } 2012 checkAllRulesForLeftRecursion()2013 public List checkAllRulesForLeftRecursion() { 2014 return sanity.checkAllRulesForLeftRecursion(); 2015 } 2016 2017 /** Return a list of left-recursive rules; no analysis can be done 2018 * successfully on these. Useful to skip these rules then and also 2019 * for ANTLRWorks to highlight them. 2020 */ getLeftRecursiveRules()2021 public Set<Rule> getLeftRecursiveRules() { 2022 if ( nfa==null ) { 2023 buildNFA(); 2024 } 2025 if ( leftRecursiveRules!=null ) { 2026 return leftRecursiveRules; 2027 } 2028 sanity.checkAllRulesForLeftRecursion(); 2029 return leftRecursiveRules; 2030 } 2031 checkRuleReference(GrammarAST scopeAST, GrammarAST refAST, GrammarAST argsAST, String currentRuleName)2032 public void checkRuleReference(GrammarAST scopeAST, 2033 GrammarAST refAST, 2034 GrammarAST argsAST, 2035 String currentRuleName) 2036 { 2037 sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName); 2038 } 2039 2040 /** Rules like "a : ;" and "a : {...} ;" should not generate 2041 * try/catch blocks for RecognitionException. To detect this 2042 * it's probably ok to just look for any reference to an atom 2043 * that can match some input. W/o that, the rule is unlikey to have 2044 * any else. 2045 */ isEmptyRule(GrammarAST block)2046 public boolean isEmptyRule(GrammarAST block) { 2047 GrammarAST aTokenRefNode = 2048 block.findFirstType(ANTLRParser.TOKEN_REF); 2049 GrammarAST aStringLiteralRefNode = 2050 block.findFirstType(ANTLRParser.STRING_LITERAL); 2051 GrammarAST aCharLiteralRefNode = 2052 block.findFirstType(ANTLRParser.CHAR_LITERAL); 2053 GrammarAST aWildcardRefNode = 2054 block.findFirstType(ANTLRParser.WILDCARD); 2055 GrammarAST aRuleRefNode = 2056 block.findFirstType(ANTLRParser.RULE_REF); 2057 if ( aTokenRefNode==null&& 2058 aStringLiteralRefNode==null&& 2059 aCharLiteralRefNode==null&& 2060 aWildcardRefNode==null&& 2061 aRuleRefNode==null ) 2062 { 2063 return true; 2064 } 2065 return false; 2066 } 2067 isAtomTokenType(int ttype)2068 public boolean isAtomTokenType(int ttype) { 2069 return ttype == ANTLRParser.WILDCARD|| 2070 ttype == ANTLRParser.CHAR_LITERAL|| 2071 ttype == ANTLRParser.CHAR_RANGE|| 2072 ttype == ANTLRParser.STRING_LITERAL|| 2073 ttype == ANTLRParser.NOT|| 2074 (type != LEXER && ttype == ANTLRParser.TOKEN_REF); 2075 } 2076 getTokenType(String tokenName)2077 public int getTokenType(String tokenName) { 2078 Integer I = null; 2079 if ( tokenName.charAt(0)=='\'') { 2080 I = (Integer)composite.stringLiteralToTypeMap.get(tokenName); 2081 } 2082 else { // must be a label like ID 2083 I = (Integer)composite.tokenIDToTypeMap.get(tokenName); 2084 } 2085 int i = (I!=null)?I.intValue():Label.INVALID; 2086 //System.out.println("grammar type "+type+" "+tokenName+"->"+i); 2087 return i; 2088 } 2089 2090 /** Get the list of tokens that are IDs like BLOCK and LPAREN */ getTokenIDs()2091 public Set getTokenIDs() { 2092 return composite.tokenIDToTypeMap.keySet(); 2093 } 2094 2095 /** Return an ordered integer list of token types that have no 2096 * corresponding token ID like INT or KEYWORD_BEGIN; for stuff 2097 * like 'begin'. 2098 */ getTokenTypesWithoutID()2099 public Collection getTokenTypesWithoutID() { 2100 List types = new ArrayList(); 2101 for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) { 2102 String name = getTokenDisplayName(t); 2103 if ( name.charAt(0)=='\'' ) { 2104 types.add(Utils.integer(t)); 2105 } 2106 } 2107 return types; 2108 } 2109 2110 /** Get a list of all token IDs and literals that have an associated 2111 * token type. 2112 */ getTokenDisplayNames()2113 public Set<String> getTokenDisplayNames() { 2114 Set<String> names = new HashSet<String>(); 2115 for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) { 2116 names.add(getTokenDisplayName(t)); 2117 } 2118 return names; 2119 } 2120 2121 /** Given a literal like (the 3 char sequence with single quotes) 'a', 2122 * return the int value of 'a'. Convert escape sequences here also. 2123 * ANTLR's antlr.g parser does not convert escape sequences. 2124 * 2125 * 11/26/2005: I changed literals to always be '...' even for strings. 2126 * This routine still works though. 2127 */ getCharValueFromGrammarCharLiteral(String literal)2128 public static int getCharValueFromGrammarCharLiteral(String literal) { 2129 switch ( literal.length() ) { 2130 case 3 : 2131 // 'x' 2132 return literal.charAt(1); // no escape char 2133 case 4 : 2134 // '\x' (antlr lexer will catch invalid char) 2135 if ( Character.isDigit(literal.charAt(2)) ) { 2136 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2137 "invalid char literal: "+literal); 2138 return -1; 2139 } 2140 int escChar = literal.charAt(2); 2141 int charVal = ANTLRLiteralEscapedCharValue[escChar]; 2142 if ( charVal==0 ) { 2143 // Unnecessary escapes like '\{' should just yield { 2144 return escChar; 2145 } 2146 return charVal; 2147 case 8 : 2148 // '\u1234' 2149 String unicodeChars = literal.substring(3,literal.length()-1); 2150 return Integer.parseInt(unicodeChars, 16); 2151 default : 2152 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2153 "invalid char literal: "+literal); 2154 return -1; 2155 } 2156 } 2157 2158 /** ANTLR does not convert escape sequences during the parse phase because 2159 * it could not know how to print String/char literals back out when 2160 * printing grammars etc... Someone in China might use the real unicode 2161 * char in a literal as it will display on their screen; when printing 2162 * back out, I could not know whether to display or use a unicode escape. 2163 * 2164 * This routine converts a string literal with possible escape sequences 2165 * into a pure string of 16-bit char values. Escapes and unicode \u0000 2166 * specs are converted to pure chars. return in a buffer; people may 2167 * want to walk/manipulate further. 2168 * 2169 * The NFA construction routine must know the actual char values. 2170 */ getUnescapedStringFromGrammarStringLiteral(String literal)2171 public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) { 2172 //System.out.println("escape: ["+literal+"]"); 2173 StringBuffer buf = new StringBuffer(); 2174 int last = literal.length()-1; // skip quotes on outside 2175 for (int i=1; i<last; i++) { 2176 char c = literal.charAt(i); 2177 if ( c=='\\' ) { 2178 i++; 2179 c = literal.charAt(i); 2180 if ( Character.toUpperCase(c)=='U' ) { 2181 // \u0000 2182 i++; 2183 String unicodeChars = literal.substring(i,i+4); 2184 // parse the unicode 16 bit hex value 2185 int val = Integer.parseInt(unicodeChars, 16); 2186 i+=4-1; // loop will inc by 1; only jump 3 then 2187 buf.append((char)val); 2188 } 2189 else if ( Character.isDigit(c) ) { 2190 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR, 2191 "invalid char literal: "+literal); 2192 buf.append("\\"+(char)c); 2193 } 2194 else { 2195 buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape 2196 } 2197 } 2198 else { 2199 buf.append(c); // simple char x 2200 } 2201 } 2202 //System.out.println("string: ["+buf.toString()+"]"); 2203 return buf; 2204 } 2205 2206 /** Pull your token definitions from an existing grammar in memory. 2207 * You must use Grammar() ctor then this method then setGrammarContent() 2208 * to make this work. This was useful primarily for testing and 2209 * interpreting grammars until I added import grammar functionality. 2210 * When you import a grammar you implicitly import its vocabulary as well 2211 * and keep the same token type values. 2212 * 2213 * Returns the max token type found. 2214 */ importTokenVocabulary(Grammar importFromGr)2215 public int importTokenVocabulary(Grammar importFromGr) { 2216 Set importedTokenIDs = importFromGr.getTokenIDs(); 2217 for (Iterator it = importedTokenIDs.iterator(); it.hasNext();) { 2218 String tokenID = (String) it.next(); 2219 int tokenType = importFromGr.getTokenType(tokenID); 2220 composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); 2221 if ( tokenType>=Label.MIN_TOKEN_TYPE ) { 2222 //System.out.println("import token from grammar "+tokenID+"="+tokenType); 2223 defineToken(tokenID, tokenType); 2224 } 2225 } 2226 return composite.maxTokenType; // return max found 2227 } 2228 2229 /** Import the rules/tokens of a delegate grammar. All delegate grammars are 2230 * read during the ctor of first Grammar created. 2231 * 2232 * Do not create NFA here because NFA construction needs to hook up with 2233 * overridden rules in delegation root grammar. 2234 */ importGrammar(GrammarAST grammarNameAST, String label)2235 public void importGrammar(GrammarAST grammarNameAST, String label) { 2236 String grammarName = grammarNameAST.getText(); 2237 //System.out.println("import "+gfile.getName()); 2238 String gname = grammarName + GRAMMAR_FILE_EXTENSION; 2239 BufferedReader br = null; 2240 try { 2241 String fullName = tool.getLibraryFile(gname); 2242 FileReader fr = new FileReader(fullName); 2243 br = new BufferedReader(fr); 2244 Grammar delegateGrammar = null; 2245 delegateGrammar = new Grammar(tool, gname, composite); 2246 delegateGrammar.label = label; 2247 2248 addDelegateGrammar(delegateGrammar); 2249 2250 delegateGrammar.parseAndBuildAST(br); 2251 delegateGrammar.addRulesForSyntacticPredicates(); 2252 if ( !validImport(delegateGrammar) ) { 2253 ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT, 2254 this, 2255 grammarNameAST.token, 2256 this, 2257 delegateGrammar); 2258 return; 2259 } 2260 if ( this.type==COMBINED && 2261 (delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])|| 2262 delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) ) 2263 { 2264 ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH, 2265 this, 2266 grammarNameAST.token, 2267 this, 2268 delegateGrammar); 2269 return; 2270 } 2271 if ( delegateGrammar.grammarTree!=null ) { 2272 // we have a valid grammar 2273 // deal with combined grammars 2274 if ( delegateGrammar.type == LEXER && this.type == COMBINED ) { 2275 // ooops, we wasted some effort; tell lexer to read it in 2276 // later 2277 lexerGrammarST.add("imports", grammarName); 2278 // but, this parser grammar will need the vocab 2279 // so add to composite anyway so we suck in the tokens later 2280 } 2281 } 2282 //System.out.println("Got grammar:\n"+delegateGrammar); 2283 } 2284 catch (IOException ioe) { 2285 ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE, 2286 gname, 2287 ioe); 2288 } 2289 finally { 2290 if ( br!=null ) { 2291 try { 2292 br.close(); 2293 } 2294 catch (IOException ioe) { 2295 ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE, 2296 gname, 2297 ioe); 2298 } 2299 } 2300 } 2301 } 2302 2303 /** add new delegate to composite tree */ addDelegateGrammar(Grammar delegateGrammar)2304 protected void addDelegateGrammar(Grammar delegateGrammar) { 2305 CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this); 2306 t.addChild(new CompositeGrammarTree(delegateGrammar)); 2307 // make sure new grammar shares this composite 2308 delegateGrammar.composite = this.composite; 2309 } 2310 2311 /** Load a vocab file <vocabName>.tokens and return max token type found. */ importTokenVocabulary(GrammarAST tokenVocabOptionAST, String vocabName)2312 public int importTokenVocabulary(GrammarAST tokenVocabOptionAST, 2313 String vocabName) 2314 { 2315 if ( !getGrammarIsRoot() ) { 2316 ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE, 2317 this, 2318 tokenVocabOptionAST.token, 2319 name); 2320 return composite.maxTokenType; 2321 } 2322 2323 File fullFile = tool.getImportedVocabFile(vocabName); 2324 try { 2325 FileReader fr = new FileReader(fullFile); 2326 BufferedReader br = new BufferedReader(fr); 2327 StreamTokenizer tokenizer = new StreamTokenizer(br); 2328 tokenizer.parseNumbers(); 2329 tokenizer.wordChars('_', '_'); 2330 tokenizer.eolIsSignificant(true); 2331 tokenizer.slashSlashComments(true); 2332 tokenizer.slashStarComments(true); 2333 tokenizer.ordinaryChar('='); 2334 tokenizer.quoteChar('\''); 2335 tokenizer.whitespaceChars(' ',' '); 2336 tokenizer.whitespaceChars('\t','\t'); 2337 int lineNum = 1; 2338 int token = tokenizer.nextToken(); 2339 while (token != StreamTokenizer.TT_EOF) { 2340 String tokenID; 2341 if ( token == StreamTokenizer.TT_WORD ) { 2342 tokenID = tokenizer.sval; 2343 } 2344 else if ( token == '\'' ) { 2345 tokenID = "'"+tokenizer.sval+"'"; 2346 } 2347 else { 2348 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2349 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2350 Utils.integer(lineNum)); 2351 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;} 2352 token = tokenizer.nextToken(); 2353 continue; 2354 } 2355 token = tokenizer.nextToken(); 2356 if ( token != '=' ) { 2357 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2358 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2359 Utils.integer(lineNum)); 2360 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;} 2361 token = tokenizer.nextToken(); 2362 continue; 2363 } 2364 token = tokenizer.nextToken(); // skip '=' 2365 if ( token != StreamTokenizer.TT_NUMBER ) { 2366 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2367 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2368 Utils.integer(lineNum)); 2369 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;} 2370 token = tokenizer.nextToken(); 2371 continue; 2372 } 2373 int tokenType = (int)tokenizer.nval; 2374 token = tokenizer.nextToken(); 2375 //System.out.println("import "+tokenID+"="+tokenType); 2376 composite.maxTokenType = Math.max(composite.maxTokenType,tokenType); 2377 defineToken(tokenID, tokenType); 2378 lineNum++; 2379 if ( token != StreamTokenizer.TT_EOL ) { 2380 ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR, 2381 vocabName+CodeGenerator.VOCAB_FILE_EXTENSION, 2382 Utils.integer(lineNum)); 2383 while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;} 2384 token = tokenizer.nextToken(); 2385 continue; 2386 } 2387 token = tokenizer.nextToken(); // skip newline 2388 } 2389 br.close(); 2390 } 2391 catch (FileNotFoundException fnfe) { 2392 ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE, 2393 fullFile); 2394 } 2395 catch (IOException ioe) { 2396 ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, 2397 fullFile, 2398 ioe); 2399 } 2400 catch (Exception e) { 2401 ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE, 2402 fullFile, 2403 e); 2404 } 2405 return composite.maxTokenType; 2406 } 2407 2408 /** Given a token type, get a meaningful name for it such as the ID 2409 * or string literal. If this is a lexer and the ttype is in the 2410 * char vocabulary, compute an ANTLR-valid (possibly escaped) char literal. 2411 */ getTokenDisplayName(int ttype)2412 public String getTokenDisplayName(int ttype) { 2413 String tokenName = null; 2414 int index=0; 2415 // inside any target's char range and is lexer grammar? 2416 if ( this.type==LEXER && 2417 ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE ) 2418 { 2419 return getANTLRCharLiteralForChar(ttype); 2420 } 2421 // faux label? 2422 else if ( ttype<0 ) { 2423 tokenName = (String)composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype); 2424 } 2425 else { 2426 // compute index in typeToTokenList for ttype 2427 index = ttype-1; // normalize to 0..n-1 2428 index += Label.NUM_FAUX_LABELS; // jump over faux tokens 2429 2430 if ( index<composite.typeToTokenList.size() ) { 2431 tokenName = (String)composite.typeToTokenList.get(index); 2432 if ( tokenName!=null && 2433 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) ) 2434 { 2435 tokenName = composite.typeToStringLiteralList.get(ttype); 2436 } 2437 } 2438 else { 2439 tokenName = String.valueOf(ttype); 2440 } 2441 } 2442 //System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName); 2443 return tokenName; 2444 } 2445 2446 /** Get the list of ANTLR String literals */ getStringLiterals()2447 public Set<String> getStringLiterals() { 2448 return composite.stringLiteralToTypeMap.keySet(); 2449 } 2450 getGrammarTypeString()2451 public String getGrammarTypeString() { 2452 return grammarTypeToString[type]; 2453 } 2454 getGrammarMaxLookahead()2455 public int getGrammarMaxLookahead() { 2456 if ( global_k>=0 ) { 2457 return global_k; 2458 } 2459 Object k = getOption("k"); 2460 if ( k==null ) { 2461 global_k = 0; 2462 } 2463 else if (k instanceof Integer) { 2464 Integer kI = (Integer)k; 2465 global_k = kI.intValue(); 2466 } 2467 else { 2468 // must be String "*" 2469 if ( k.equals("*") ) { // this the default anyway 2470 global_k = 0; 2471 } 2472 } 2473 return global_k; 2474 } 2475 2476 /** Save the option key/value pair and process it; return the key 2477 * or null if invalid option. 2478 */ setOption(String key, Object value, Token optionsStartToken)2479 public String setOption(String key, Object value, Token optionsStartToken) { 2480 if ( legalOption(key) ) { 2481 ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION, 2482 this, 2483 optionsStartToken, 2484 key); 2485 return null; 2486 } 2487 if ( !optionIsValid(key, value) ) { 2488 return null; 2489 } 2490 if ( key.equals("backtrack") && value.toString().equals("true") ) { 2491 composite.getRootGrammar().atLeastOneBacktrackOption = true; 2492 } 2493 if ( options==null ) { 2494 options = new HashMap(); 2495 } 2496 options.put(key, value); 2497 return key; 2498 } 2499 legalOption(String key)2500 public boolean legalOption(String key) { 2501 switch ( type ) { 2502 case LEXER : 2503 return !legalLexerOptions.contains(key); 2504 case PARSER : 2505 return !legalParserOptions.contains(key); 2506 case TREE_PARSER : 2507 return !legalTreeParserOptions.contains(key); 2508 default : 2509 return !legalParserOptions.contains(key); 2510 } 2511 } 2512 setOptions(Map options, Token optionsStartToken)2513 public void setOptions(Map options, Token optionsStartToken) { 2514 if ( options==null ) { 2515 this.options = null; 2516 return; 2517 } 2518 Set keys = options.keySet(); 2519 for (Iterator it = keys.iterator(); it.hasNext();) { 2520 String optionName = (String) it.next(); 2521 Object optionValue = options.get(optionName); 2522 String stored=setOption(optionName, optionValue, optionsStartToken); 2523 if ( stored==null ) { 2524 it.remove(); 2525 } 2526 } 2527 } 2528 getOption(String key)2529 public Object getOption(String key) { 2530 return composite.getOption(key); 2531 } 2532 getLocallyDefinedOption(String key)2533 public Object getLocallyDefinedOption(String key) { 2534 Object value = null; 2535 if ( options!=null ) { 2536 value = options.get(key); 2537 } 2538 if ( value==null ) { 2539 value = defaultOptions.get(key); 2540 } 2541 return value; 2542 } 2543 getBlockOption(GrammarAST blockAST, String key)2544 public Object getBlockOption(GrammarAST blockAST, String key) { 2545 String v = (String)blockAST.getBlockOption(key); 2546 if ( v!=null ) { 2547 return v; 2548 } 2549 if ( type==Grammar.LEXER ) { 2550 return defaultLexerBlockOptions.get(key); 2551 } 2552 return defaultBlockOptions.get(key); 2553 } 2554 getUserMaxLookahead(int decision)2555 public int getUserMaxLookahead(int decision) { 2556 int user_k = 0; 2557 GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision); 2558 Object k = blockAST.getBlockOption("k"); 2559 if ( k==null ) { 2560 user_k = nfa.grammar.getGrammarMaxLookahead(); 2561 return user_k; 2562 } 2563 if (k instanceof Integer) { 2564 Integer kI = (Integer)k; 2565 user_k = kI.intValue(); 2566 } 2567 else { 2568 // must be String "*" 2569 if ( k.equals("*") ) { 2570 user_k = 0; 2571 } 2572 } 2573 return user_k; 2574 } 2575 getAutoBacktrackMode(int decision)2576 public boolean getAutoBacktrackMode(int decision) { 2577 NFAState decisionNFAStartState = getDecisionNFAStartState(decision); 2578 String autoBacktrack = 2579 (String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack"); 2580 2581 if ( autoBacktrack==null ) { 2582 autoBacktrack = (String)nfa.grammar.getOption("backtrack"); 2583 } 2584 return autoBacktrack!=null&&autoBacktrack.equals("true"); 2585 } 2586 optionIsValid(String key, Object value)2587 public boolean optionIsValid(String key, Object value) { 2588 return true; 2589 } 2590 buildAST()2591 public boolean buildAST() { 2592 String outputType = (String)getOption("output"); 2593 if ( outputType!=null ) { 2594 return outputType.toString().equals("AST"); 2595 } 2596 return false; 2597 } 2598 rewriteMode()2599 public boolean rewriteMode() { 2600 Object outputType = getOption("rewrite"); 2601 if ( outputType!=null ) { 2602 return outputType.toString().equals("true"); 2603 } 2604 return false; 2605 } 2606 isBuiltFromString()2607 public boolean isBuiltFromString() { 2608 return builtFromString; 2609 } 2610 buildTemplate()2611 public boolean buildTemplate() { 2612 String outputType = (String)getOption("output"); 2613 if ( outputType!=null ) { 2614 return outputType.toString().equals("template"); 2615 } 2616 return false; 2617 } 2618 getRules()2619 public Collection<Rule> getRules() { 2620 return nameToRuleMap.values(); 2621 } 2622 2623 /** Get the set of Rules that need to have manual delegations 2624 * like "void rule() { importedGrammar.rule(); }" 2625 * 2626 * If this grammar is master, get list of all rule definitions from all 2627 * delegate grammars. Only master has complete interface from combined 2628 * grammars...we will generated delegates as helper objects. 2629 * 2630 * Composite grammars that are not the root/master do not have complete 2631 * interfaces. It is not my intention that people use subcomposites. 2632 * Only the outermost grammar should be used from outside code. The 2633 * other grammar components are specifically generated to work only 2634 * with the master/root. 2635 * 2636 * delegatedRules = imported - overridden 2637 */ getDelegatedRules()2638 public Set<Rule> getDelegatedRules() { 2639 return composite.getDelegatedRules(this); 2640 } 2641 2642 /** Get set of all rules imported from all delegate grammars even if 2643 * indirectly delegated. 2644 */ getAllImportedRules()2645 public Set<Rule> getAllImportedRules() { 2646 return composite.getAllImportedRules(this); 2647 } 2648 2649 /** Get list of all delegates from all grammars directly or indirectly 2650 * imported into this grammar. 2651 */ getDelegates()2652 public List<Grammar> getDelegates() { 2653 return composite.getDelegates(this); 2654 } 2655 getHasDelegates()2656 public boolean getHasDelegates() { 2657 return !getDelegates().isEmpty(); 2658 } 2659 getDelegateNames()2660 public List<String> getDelegateNames() { 2661 // compute delegates:{Grammar g | return g.name;} 2662 List<String> names = new ArrayList<String>(); 2663 List<Grammar> delegates = composite.getDelegates(this); 2664 if ( delegates!=null ) { 2665 for (Grammar g : delegates) { 2666 names.add(g.name); 2667 } 2668 } 2669 return names; 2670 } 2671 getDirectDelegates()2672 public List<Grammar> getDirectDelegates() { 2673 return composite.getDirectDelegates(this); 2674 } 2675 2676 /** Get delegates below direct delegates */ getIndirectDelegates()2677 public List<Grammar> getIndirectDelegates() { 2678 return composite.getIndirectDelegates(this); 2679 } 2680 2681 /** Get list of all delegators. This amounts to the grammars on the path 2682 * to the root of the delegation tree. 2683 */ getDelegators()2684 public List<Grammar> getDelegators() { 2685 return composite.getDelegators(this); 2686 } 2687 2688 /** Who's my direct parent grammar? */ getDelegator()2689 public Grammar getDelegator() { 2690 return composite.getDelegator(this); 2691 } 2692 getDelegatedRuleReferences()2693 public Set<Rule> getDelegatedRuleReferences() { 2694 return delegatedRuleReferences; 2695 } 2696 getGrammarIsRoot()2697 public boolean getGrammarIsRoot() { 2698 return composite.delegateGrammarTreeRoot.grammar == this; 2699 } 2700 setRuleAST(String ruleName, GrammarAST t)2701 public void setRuleAST(String ruleName, GrammarAST t) { 2702 Rule r = getLocallyDefinedRule(ruleName); 2703 if ( r!=null ) { 2704 r.tree = t; 2705 r.EORNode = t.getLastChild(); 2706 } 2707 } 2708 getRuleStartState(String ruleName)2709 public NFAState getRuleStartState(String ruleName) { 2710 return getRuleStartState(null, ruleName); 2711 } 2712 getRuleStartState(String scopeName, String ruleName)2713 public NFAState getRuleStartState(String scopeName, String ruleName) { 2714 Rule r = getRule(scopeName, ruleName); 2715 if ( r!=null ) { 2716 //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState); 2717 return r.startState; 2718 } 2719 //System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null"); 2720 return null; 2721 } 2722 getRuleModifier(String ruleName)2723 public String getRuleModifier(String ruleName) { 2724 Rule r = getRule(ruleName); 2725 if ( r!=null ) { 2726 return r.modifier; 2727 } 2728 return null; 2729 } 2730 getRuleStopState(String ruleName)2731 public NFAState getRuleStopState(String ruleName) { 2732 Rule r = getRule(ruleName); 2733 if ( r!=null ) { 2734 return r.stopState; 2735 } 2736 return null; 2737 } 2738 assignDecisionNumber(NFAState state)2739 public int assignDecisionNumber(NFAState state) { 2740 decisionCount++; 2741 state.setDecisionNumber(decisionCount); 2742 return decisionCount; 2743 } 2744 getDecision(int decision)2745 protected Decision getDecision(int decision) { 2746 int index = decision-1; 2747 if ( index >= indexToDecision.size() ) { 2748 return null; 2749 } 2750 Decision d = indexToDecision.get(index); 2751 return d; 2752 } 2753 getDecisions()2754 public List<Decision> getDecisions() { 2755 return indexToDecision; 2756 } 2757 createDecision(int decision)2758 protected Decision createDecision(int decision) { 2759 int index = decision-1; 2760 if ( index < indexToDecision.size() ) { 2761 return getDecision(decision); // don't recreate 2762 } 2763 Decision d = new Decision(); 2764 d.decision = decision; 2765 d.grammar = this; 2766 indexToDecision.setSize(getNumberOfDecisions()); 2767 indexToDecision.set(index, d); 2768 return d; 2769 } 2770 getDecisionNFAStartStateList()2771 public List getDecisionNFAStartStateList() { 2772 List states = new ArrayList(100); 2773 for (int d = 0; d < indexToDecision.size(); d++) { 2774 Decision dec = (Decision) indexToDecision.get(d); 2775 states.add(dec.startState); 2776 } 2777 return states; 2778 } 2779 getDecisionNFAStartState(int decision)2780 public NFAState getDecisionNFAStartState(int decision) { 2781 Decision d = getDecision(decision); 2782 if ( d==null ) { 2783 return null; 2784 } 2785 return d.startState; 2786 } 2787 getLookaheadDFA(int decision)2788 public DFA getLookaheadDFA(int decision) { 2789 Decision d = getDecision(decision); 2790 if ( d==null ) { 2791 return null; 2792 } 2793 return d.dfa; 2794 } 2795 getDecisionBlockAST(int decision)2796 public GrammarAST getDecisionBlockAST(int decision) { 2797 Decision d = getDecision(decision); 2798 if ( d==null ) { 2799 return null; 2800 } 2801 return d.blockAST; 2802 } 2803 2804 /** returns a list of column numbers for all decisions 2805 * on a particular line so ANTLRWorks choose the decision 2806 * depending on the location of the cursor (otherwise, 2807 * ANTLRWorks has to give the *exact* location which 2808 * is not easy from the user point of view). 2809 * 2810 * This is not particularly fast as it walks entire line:col->DFA map 2811 * looking for a prefix of "line:". 2812 */ getLookaheadDFAColumnsForLineInFile(int line)2813 public List getLookaheadDFAColumnsForLineInFile(int line) { 2814 String prefix = line+":"; 2815 List columns = new ArrayList(); 2816 for(Iterator iter = lineColumnToLookaheadDFAMap.keySet().iterator(); 2817 iter.hasNext(); ) { 2818 String key = (String)iter.next(); 2819 if(key.startsWith(prefix)) { 2820 columns.add(Integer.valueOf(key.substring(prefix.length()))); 2821 } 2822 } 2823 return columns; 2824 } 2825 2826 /** Useful for ANTLRWorks to map position in file to the DFA for display */ getLookaheadDFAFromPositionInFile(int line, int col)2827 public DFA getLookaheadDFAFromPositionInFile(int line, int col) { 2828 return (DFA)lineColumnToLookaheadDFAMap.get( 2829 new StringBuffer().append(line + ":").append(col).toString()); 2830 } 2831 getLineColumnToLookaheadDFAMap()2832 public Map getLineColumnToLookaheadDFAMap() { 2833 return lineColumnToLookaheadDFAMap; 2834 } 2835 2836 /* 2837 public void setDecisionOptions(int decision, Map options) { 2838 Decision d = createDecision(decision); 2839 d.options = options; 2840 } 2841 2842 public void setDecisionOption(int decision, String name, Object value) { 2843 Decision d = getDecision(decision); 2844 if ( d!=null ) { 2845 if ( d.options==null ) { 2846 d.options = new HashMap(); 2847 } 2848 d.options.put(name,value); 2849 } 2850 } 2851 2852 public Map getDecisionOptions(int decision) { 2853 Decision d = getDecision(decision); 2854 if ( d==null ) { 2855 return null; 2856 } 2857 return d.options; 2858 } 2859 */ 2860 getNumberOfDecisions()2861 public int getNumberOfDecisions() { 2862 return decisionCount; 2863 } 2864 getNumberOfCyclicDecisions()2865 public int getNumberOfCyclicDecisions() { 2866 int n = 0; 2867 for (int i=1; i<=getNumberOfDecisions(); i++) { 2868 Decision d = getDecision(i); 2869 if ( d.dfa!=null && d.dfa.isCyclic() ) { 2870 n++; 2871 } 2872 } 2873 return n; 2874 } 2875 2876 /** Set the lookahead DFA for a particular decision. This means 2877 * that the appropriate AST node must updated to have the new lookahead 2878 * DFA. This method could be used to properly set the DFAs without 2879 * using the createLookaheadDFAs() method. You could do this 2880 * 2881 * Grammar g = new Grammar("..."); 2882 * g.setLookahead(1, dfa1); 2883 * g.setLookahead(2, dfa2); 2884 * ... 2885 */ setLookaheadDFA(int decision, DFA lookaheadDFA)2886 public void setLookaheadDFA(int decision, DFA lookaheadDFA) { 2887 Decision d = createDecision(decision); 2888 d.dfa = lookaheadDFA; 2889 GrammarAST ast = d.startState.associatedASTNode; 2890 ast.setLookaheadDFA(lookaheadDFA); 2891 } 2892 setDecisionNFA(int decision, NFAState state)2893 public void setDecisionNFA(int decision, NFAState state) { 2894 Decision d = createDecision(decision); 2895 d.startState = state; 2896 } 2897 setDecisionBlockAST(int decision, GrammarAST blockAST)2898 public void setDecisionBlockAST(int decision, GrammarAST blockAST) { 2899 //System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token); 2900 Decision d = createDecision(decision); 2901 d.blockAST = blockAST; 2902 } 2903 allDecisionDFAHaveBeenCreated()2904 public boolean allDecisionDFAHaveBeenCreated() { 2905 return allDecisionDFACreated; 2906 } 2907 2908 /** How many token types have been allocated so far? */ getMaxTokenType()2909 public int getMaxTokenType() { 2910 return composite.maxTokenType; 2911 } 2912 2913 /** What is the max char value possible for this grammar's target? Use 2914 * unicode max if no target defined. 2915 */ getMaxCharValue()2916 public int getMaxCharValue() { 2917 if ( generator!=null ) { 2918 return generator.target.getMaxCharValue(generator); 2919 } 2920 else { 2921 return Label.MAX_CHAR_VALUE; 2922 } 2923 } 2924 2925 /** Return a set of all possible token or char types for this grammar */ getTokenTypes()2926 public IntSet getTokenTypes() { 2927 if ( type==LEXER ) { 2928 return getAllCharValues(); 2929 } 2930 return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType()); 2931 } 2932 2933 /** If there is a char vocabulary, use it; else return min to max char 2934 * as defined by the target. If no target, use max unicode char value. 2935 */ getAllCharValues()2936 public IntSet getAllCharValues() { 2937 if ( charVocabulary!=null ) { 2938 return charVocabulary; 2939 } 2940 IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue()); 2941 return allChar; 2942 } 2943 2944 /** Return a string representing the escaped char for code c. E.g., If c 2945 * has value 0x100, you will get "\u0100". ASCII gets the usual 2946 * char (non-hex) representation. Control characters are spit out 2947 * as unicode. While this is specially set up for returning Java strings, 2948 * it can be used by any language target that has the same syntax. :) 2949 * 2950 * 11/26/2005: I changed this to use double quotes, consistent with antlr.g 2951 * 12/09/2005: I changed so everything is single quotes 2952 */ getANTLRCharLiteralForChar(int c)2953 public static String getANTLRCharLiteralForChar(int c) { 2954 if ( c<Label.MIN_CHAR_VALUE ) { 2955 ErrorManager.internalError("invalid char value "+c); 2956 return "'<INVALID>'"; 2957 } 2958 if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) { 2959 return '\''+ANTLRLiteralCharValueEscape[c]+'\''; 2960 } 2961 if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN && 2962 !Character.isISOControl((char)c) ) { 2963 if ( c=='\\' ) { 2964 return "'\\\\'"; 2965 } 2966 if ( c=='\'') { 2967 return "'\\''"; 2968 } 2969 return '\''+Character.toString((char)c)+'\''; 2970 } 2971 // turn on the bit above max "\uFFFF" value so that we pad with zeros 2972 // then only take last 4 digits 2973 String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5); 2974 String unicodeStr = "'\\u"+hex+"'"; 2975 return unicodeStr; 2976 } 2977 2978 /** For lexer grammars, return everything in unicode not in set. 2979 * For parser and tree grammars, return everything in token space 2980 * from MIN_TOKEN_TYPE to last valid token type or char value. 2981 */ complement(IntSet set)2982 public IntSet complement(IntSet set) { 2983 //System.out.println("complement "+set.toString(this)); 2984 //System.out.println("vocabulary "+getTokenTypes().toString(this)); 2985 IntSet c = set.complement(getTokenTypes()); 2986 //System.out.println("result="+c.toString(this)); 2987 return c; 2988 } 2989 complement(int atom)2990 public IntSet complement(int atom) { 2991 return complement(IntervalSet.of(atom)); 2992 } 2993 2994 /** Given set tree like ( SET A B ), check that A and B 2995 * are both valid sets themselves, else we must tree like a BLOCK 2996 */ isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t)2997 public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) { 2998 boolean valid = true; 2999 try { 3000 //System.out.println("parse BLOCK as set tree: "+t.toStringTree()); 3001 int alts = nfabuilder.testBlockAsSet(t); 3002 valid = alts > 1; 3003 } 3004 catch (RecognitionException re) { 3005 // The rule did not parse as a set, return null; ignore exception 3006 valid = false; 3007 } 3008 //System.out.println("valid? "+valid); 3009 return valid; 3010 } 3011 3012 /** Get the set equivalent (if any) of the indicated rule from this 3013 * grammar. Mostly used in the lexer to do ~T for some fragment rule 3014 * T. If the rule AST has a SET use that. If the rule is a single char 3015 * convert it to a set and return. If rule is not a simple set (w/o actions) 3016 * then return null. 3017 * Rules have AST form: 3018 * 3019 * ^( RULE ID modifier ARG RET SCOPE block EOR ) 3020 */ getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)3021 public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName) 3022 throws RecognitionException 3023 { 3024 Rule r = getRule(ruleName); 3025 if ( r==null ) { 3026 return null; 3027 } 3028 IntSet elements = null; 3029 //System.out.println("parsed tree: "+r.tree.toStringTree()); 3030 elements = nfabuilder.setRule(r.tree); 3031 //System.out.println("elements="+elements); 3032 return elements; 3033 } 3034 3035 /** Decisions are linked together with transition(1). Count how 3036 * many there are. This is here rather than in NFAState because 3037 * a grammar decides how NFAs are put together to form a decision. 3038 */ getNumberOfAltsForDecisionNFA(NFAState decisionState)3039 public int getNumberOfAltsForDecisionNFA(NFAState decisionState) { 3040 if ( decisionState==null ) { 3041 return 0; 3042 } 3043 int n = 1; 3044 NFAState p = decisionState; 3045 while ( p.transition[1] !=null ) { 3046 n++; 3047 p = (NFAState)p.transition[1].target; 3048 } 3049 return n; 3050 } 3051 3052 /** Get the ith alternative (1..n) from a decision; return null when 3053 * an invalid alt is requested. I must count in to find the right 3054 * alternative number. For (A|B), you get NFA structure (roughly): 3055 * 3056 * o->o-A->o 3057 * | 3058 * o->o-B->o 3059 * 3060 * This routine returns the leftmost state for each alt. So alt=1, returns 3061 * the upperleft most state in this structure. 3062 */ getNFAStateForAltOfDecision(NFAState decisionState, int alt)3063 public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) { 3064 if ( decisionState==null || alt<=0 ) { 3065 return null; 3066 } 3067 int n = 1; 3068 NFAState p = decisionState; 3069 while ( p!=null ) { 3070 if ( n==alt ) { 3071 return p; 3072 } 3073 n++; 3074 Transition next = p.transition[1]; 3075 p = null; 3076 if ( next!=null ) { 3077 p = (NFAState)next.target; 3078 } 3079 } 3080 return null; 3081 } 3082 3083 /* 3084 public void computeRuleFOLLOWSets() { 3085 if ( getNumberOfDecisions()==0 ) { 3086 createNFAs(); 3087 } 3088 for (Iterator it = getRules().iterator(); it.hasNext();) { 3089 Rule r = (Rule)it.next(); 3090 if ( r.isSynPred ) { 3091 continue; 3092 } 3093 LookaheadSet s = ll1Analyzer.FOLLOW(r); 3094 System.out.println("FOLLOW("+r.name+")="+s); 3095 } 3096 } 3097 */ 3098 FIRST(NFAState s)3099 public LookaheadSet FIRST(NFAState s) { 3100 return ll1Analyzer.FIRST(s); 3101 } 3102 LOOK(NFAState s)3103 public LookaheadSet LOOK(NFAState s) { 3104 return ll1Analyzer.LOOK(s); 3105 } 3106 setCodeGenerator(CodeGenerator generator)3107 public void setCodeGenerator(CodeGenerator generator) { 3108 this.generator = generator; 3109 } 3110 getCodeGenerator()3111 public CodeGenerator getCodeGenerator() { 3112 return generator; 3113 } 3114 getGrammarTree()3115 public GrammarAST getGrammarTree() { 3116 return grammarTree; 3117 } 3118 setGrammarTree(GrammarAST value)3119 public void setGrammarTree(GrammarAST value) { 3120 grammarTree = value; 3121 } 3122 getTool()3123 public Tool getTool() { 3124 return tool; 3125 } 3126 setTool(Tool tool)3127 public void setTool(Tool tool) { 3128 this.tool = tool; 3129 } 3130 3131 /** given a token type and the text of the literal, come up with a 3132 * decent token type label. For now it's just T<type>. Actually, 3133 * if there is an aliased name from tokens like PLUS='+', use it. 3134 */ computeTokenNameFromLiteral(int tokenType, String literal)3135 public String computeTokenNameFromLiteral(int tokenType, String literal) { 3136 return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType; 3137 } 3138 toString()3139 public String toString() { 3140 // return "FFFFFFFFFFFFFF"; 3141 return grammarTreeToString(grammarTree); 3142 } 3143 grammarTreeToString(GrammarAST t)3144 public String grammarTreeToString(GrammarAST t) { 3145 return grammarTreeToString(t, true); 3146 } 3147 grammarTreeToString(GrammarAST t, boolean showActions)3148 public String grammarTreeToString(GrammarAST t, boolean showActions) { 3149 String s = null; 3150 try { 3151 s = t.getLine()+":"+(t.getCharPositionInLine()+1)+": "; 3152 s += new ANTLRTreePrinter(new CommonTreeNodeStream(t)).toString(this, showActions); 3153 } 3154 catch (Exception e) { 3155 s = "<invalid or missing tree structure>"; 3156 } 3157 return s; 3158 } 3159 printGrammar(PrintStream output)3160 public void printGrammar(PrintStream output) { 3161 ANTLRTreePrinter printer = new ANTLRTreePrinter(new CommonTreeNodeStream(getGrammarTree())); 3162 try { 3163 String g = printer.toString(this, false); 3164 output.println(g); 3165 } 3166 catch (RecognitionException re) { 3167 ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re); 3168 } 3169 } 3170 3171 } 3172