• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.Tool;
31 import org.antlr.analysis.*;
32 import org.antlr.analysis.DFA;
33 import org.antlr.codegen.CodeGenerator;
34 import org.antlr.codegen.*;
35 import org.antlr.grammar.v3.*;
36 import org.antlr.misc.*;
37 import org.antlr.misc.Utils;
38 import org.antlr.runtime.*;
39 import org.antlr.runtime.tree.CommonTreeNodeStream;
40 import org.stringtemplate.v4.ST;
41 import org.stringtemplate.v4.STGroup;
42 import org.stringtemplate.v4.STGroupString;
43 
44 import java.io.*;
45 import java.util.*;
46 
47 /** Represents a grammar in memory. */
48 public class Grammar {
49 	public static final String SYNPRED_RULE_PREFIX = "synpred";
50 
51 	public static final String GRAMMAR_FILE_EXTENSION = ".g";
52 
53 	/** used for generating lexer temp files */
54 	public static final String LEXER_GRAMMAR_FILE_EXTENSION = ".g";
55 
56 	public static final int INITIAL_DECISION_LIST_SIZE = 300;
57 	public static final int INVALID_RULE_INDEX = -1;
58 
59 	// the various kinds of labels. t=type, id=ID, types+=type ids+=ID
60 	public static final int RULE_LABEL = 1;
61 	public static final int TOKEN_LABEL = 2;
62 	public static final int RULE_LIST_LABEL = 3;
63 	public static final int TOKEN_LIST_LABEL = 4;
64     public static final int CHAR_LABEL = 5; // used in lexer for x='a'
65     public static final int WILDCARD_TREE_LABEL = 6; // Used in tree grammar x=.
66     public static final int WILDCARD_TREE_LIST_LABEL = 7; // Used in tree grammar x+=.
67 
68 
69     public static String[] LabelTypeToString =
70 		{"<invalid>", "rule", "token", "rule-list", "token-list", "wildcard-tree", "wildcard-tree-list"};
71 
72 	public static final String ARTIFICIAL_TOKENS_RULENAME = "Tokens";
73 	public static final String FRAGMENT_RULE_MODIFIER = "fragment";
74 
75 	public static final String SYNPREDGATE_ACTION_NAME = "synpredgate";
76 
77 	/** When converting ANTLR char and string literals, here is the
78 	 *  value set of escape chars.
79 	 */
80 	public static int ANTLRLiteralEscapedCharValue[] = new int[255];
81 
82 	/** Given a char, we need to be able to show as an ANTLR literal.
83 	 */
84 	public static String ANTLRLiteralCharValueEscape[] = new String[255];
85 
86 	static {
87 		ANTLRLiteralEscapedCharValue['n'] = '\n';
88 		ANTLRLiteralEscapedCharValue['r'] = '\r';
89 		ANTLRLiteralEscapedCharValue['t'] = '\t';
90 		ANTLRLiteralEscapedCharValue['b'] = '\b';
91 		ANTLRLiteralEscapedCharValue['f'] = '\f';
92 		ANTLRLiteralEscapedCharValue['\\'] = '\\';
93 		ANTLRLiteralEscapedCharValue['\''] = '\'';
94 		ANTLRLiteralEscapedCharValue['"'] = '"';
95 		ANTLRLiteralCharValueEscape['\n'] = "\\n";
96 		ANTLRLiteralCharValueEscape['\r'] = "\\r";
97 		ANTLRLiteralCharValueEscape['\t'] = "\\t";
98 		ANTLRLiteralCharValueEscape['\b'] = "\\b";
99 		ANTLRLiteralCharValueEscape['\f'] = "\\f";
100 		ANTLRLiteralCharValueEscape['\\'] = "\\\\";
101 		ANTLRLiteralCharValueEscape['\''] = "\\'";
102 	}
103 
104 	public static final int LEXER = 1;
105 	public static final int PARSER = 2;
106 	public static final int TREE_PARSER = 3;
107 	public static final int COMBINED = 4;
108 	public static final String[] grammarTypeToString = new String[] {
109 		"<invalid>",
110 		"lexer",
111 		"parser",
112 		"tree",
113 		"combined"
114 	};
115 
116 	public static final String[] grammarTypeToFileNameSuffix = new String[] {
117 		"<invalid>",
118 		"Lexer",
119 		"Parser",
120 		"", // no suffix for tree grammars
121 		"Parser" // if combined grammar, gen Parser and Lexer will be done later
122 	};
123 
124 	/** Set of valid imports.  E.g., can only import a tree parser into
125 	 *  another tree parser.  Maps delegate to set of delegator grammar types.
126 	 *  validDelegations.get(LEXER) gives list of the kinds of delegators
127 	 *  that can import lexers.
128 	 */
129 	public static MultiMap<Integer,Integer> validDelegations =
130 		new MultiMap<Integer,Integer>() {
131 			{
132 				map(LEXER, LEXER);
133 				map(LEXER, PARSER);
134 				map(LEXER, COMBINED);
135 
136 				map(PARSER, PARSER);
137 				map(PARSER, COMBINED);
138 
139 				map(TREE_PARSER, TREE_PARSER);
140 
141 				// TODO: allow COMBINED
142 				// map(COMBINED, COMBINED);
143 			}
144 		};
145 
146 	/** This is the buffer of *all* tokens found in the grammar file
147 	 *  including whitespace tokens etc...  I use this to extract
148 	 *  lexer rules from combined grammars.
149 	 */
150 	public CommonTokenStream tokenBuffer;
151 	public static final String IGNORE_STRING_IN_GRAMMAR_FILE_NAME = "__";
152 	public static final String AUTO_GENERATED_TOKEN_NAME_PREFIX = "T__";
153 
154 	public static class Decision {
155 		public Grammar grammar;
156 		public int decision;
157 		public NFAState startState;
158 		public GrammarAST blockAST;
159 		public DFA dfa;
160 	}
161 
162 	public class LabelElementPair {
163 		public Token label;
164 		public GrammarAST elementRef;
165 		public String referencedRuleName;
166 		/** Has an action referenced the label?  Set by ActionAnalysis.g
167 		 *  Currently only set for rule labels.
168 		 */
169 		public boolean actionReferencesLabel;
170 		public int type; // in {RULE_LABEL,TOKEN_LABEL,RULE_LIST_LABEL,TOKEN_LIST_LABEL}
LabelElementPair(Token label, GrammarAST elementRef)171 		public LabelElementPair(Token label, GrammarAST elementRef) {
172 			this.label = label;
173 			this.elementRef = elementRef;
174 			this.referencedRuleName = elementRef.getText();
175 		}
getReferencedRule()176 		public Rule getReferencedRule() {
177 			return getRule(referencedRuleName);
178 		}
toString()179 		public String toString() {
180 			return elementRef.toString();
181 		}
182 	}
183 
184 	/** What name did the user provide for this grammar? */
185 	public String name;
186 
187 	/** What type of grammar is this: lexer, parser, tree walker */
188 	public int type;
189 
190 	/** A list of options specified at the grammar level such as language=Java.
191 	 *  The value can be an AST for complicated values such as character sets.
192 	 *  There may be code generator specific options in here.  I do no
193 	 *  interpretation of the key/value pairs...they are simply available for
194 	 *  who wants them.
195 	 */
196 	protected Map options;
197 
198 	public static final Set legalLexerOptions =
199 			new HashSet() {
200 				{
201 				add("language"); add("tokenVocab");
202 				add("TokenLabelType");
203 				add("superClass");
204 				add("filter");
205 				add("k");
206 				add("backtrack");
207 				add("memoize");
208 				}
209 			};
210 
211 	public static final Set legalParserOptions =
212 			new HashSet() {
213 				{
214 				add("language"); add("tokenVocab");
215 				add("output"); add("rewrite"); add("ASTLabelType");
216 				add("TokenLabelType");
217 				add("superClass");
218 				add("k");
219 				add("backtrack");
220 				add("memoize");
221 				}
222 			};
223 
224     public static final Set legalTreeParserOptions =
225         new HashSet() {
226             {
227                 add("language"); add("tokenVocab");
228                 add("output"); add("rewrite"); add("ASTLabelType");
229                 add("TokenLabelType");
230                 add("superClass");
231                 add("k");
232                 add("backtrack");
233                 add("memoize");
234                 add("filter");
235             }
236         };
237 
238 	public static final Set doNotCopyOptionsToLexer =
239 		new HashSet() {
240 			{
241 				add("output"); add("ASTLabelType"); add("superClass");
242 				add("k"); add("backtrack"); add("memoize"); add("rewrite");
243 			}
244 		};
245 
246 	public static final Map defaultOptions =
247 			new HashMap() {
248 				{
249 					put("language","Java");
250 				}
251 			};
252 
253 	public static final Set legalBlockOptions =
254 			new HashSet() {{add("k"); add("greedy"); add("backtrack"); add("memoize");}};
255 
256 	/** What are the default options for a subrule? */
257 	public static final Map defaultBlockOptions =
258 			new HashMap() {{put("greedy","true");}};
259 
260 	public static final Map defaultLexerBlockOptions =
261 			new HashMap() {{put("greedy","true");}};
262 
263 	// Token options are here to avoid contaminating Token object in runtime
264 
265 	/** Legal options for terminal refs like ID<node=MyVarNode> */
266 	public static final Set legalTokenOptions =
267 		new HashSet() {
268 			{
269 				add(defaultTokenOption);
270 				add("type");
271 				add("text");
272 				add("assoc");
273 			}
274 		};
275 
276 	public static final String defaultTokenOption = "node";
277 
278 	/** Is there a global fixed lookahead set for this grammar?
279 	 *  If 0, nothing specified.  -1 implies we have not looked at
280 	 *  the options table yet to set k.
281 	 */
282 	protected int global_k = -1;
283 
284 	/** Map a scope to a map of name:action pairs.
285 	 *  Map<String, Map<String,GrammarAST>>
286 	 *  The code generator will use this to fill holes in the output files.
287 	 *  I track the AST node for the action in case I need the line number
288 	 *  for errors.
289 	 */
290 	private Map<String, Map<String, Object>> actions =
291 		new HashMap<String, Map<String, Object>>();
292 
293 	/** The NFA that represents the grammar with edges labelled with tokens
294 	 *  or epsilon.  It is more suitable to analysis than an AST representation.
295 	 */
296 	public NFA nfa;
297 
298 	protected NFAFactory factory;
299 
300 	/** If this grammar is part of a larger composite grammar via delegate
301 	 *  statement, then this points at the composite.  The composite holds
302 	 *  a global list of rules, token types, decision numbers, etc...
303 	 */
304 	public CompositeGrammar composite;
305 
306 	/** A pointer back into grammar tree.  Needed so we can add delegates. */
307 	public CompositeGrammarTree compositeTreeNode;
308 
309 	/** If this is a delegate of another grammar, this is the label used
310 	 *  as an instance var by that grammar to point at this grammar. null
311 	 *  if no label was specified in the delegate statement.
312 	 */
313 	public String label;
314 
315 	/** TODO: hook this to the charVocabulary option */
316 	protected IntSet charVocabulary = null;
317 
318 	/** For ANTLRWorks, we want to be able to map a line:col to a specific
319 	 *  decision DFA so it can display DFA.
320 	 */
321 	Map lineColumnToLookaheadDFAMap = new HashMap();
322 
323 	public Tool tool;
324 
325 	/** The unique set of all rule references in any rule; set of tree node
326 	 *  objects so two refs to same rule can exist but at different line/position.
327 	 */
328 	protected Set<GrammarAST> ruleRefs = new HashSet<GrammarAST>();
329 
330 	protected Set<GrammarAST> scopedRuleRefs = new HashSet();
331 
332 	/** The unique set of all token ID references in any rule */
333 	protected Set<Token> tokenIDRefs = new HashSet<Token>();
334 
335 	/** Be able to assign a number to every decision in grammar;
336 	 *  decisions in 1..n
337 	 */
338 	protected int decisionCount = 0;
339 
340 	/** A list of all rules that are in any left-recursive cycle.  There
341 	 *  could be multiple cycles, but this is a flat list of all problematic
342 	 *  rules. This is stuff we couldn't refactor to precedence rule.
343 	 */
344 	protected Set<Rule> leftRecursiveRules;
345 
346 	/** An external tool requests that DFA analysis abort prematurely.  Stops
347 	 *  at DFA granularity, which are limited to a DFA size and time computation
348 	 *  as failsafe.
349 	 */
350 	protected boolean externalAnalysisAbort;
351 
352 	public int numNonLLStar = 0; // hack to track for -report
353 
354 	/** When we read in a grammar, we track the list of syntactic predicates
355 	 *  and build faux rules for them later.  See my blog entry Dec 2, 2005:
356 	 *  http://www.antlr.org/blog/antlr3/lookahead.tml
357 	 *  This maps the name (we make up) for a pred to the AST grammar fragment.
358 	 */
359 	protected LinkedHashMap<String, GrammarAST> nameToSynpredASTMap;
360 
361 	/** Each left-recursive precedence rule must define precedence array
362 	 *  for binary operators like:
363 	 *
364 	 *  	static int[] e_prec = new int[tokenNames.length];
365 	 *  	static {
366    	 *  		e_prec[75] = 1;
367 	 *  	}
368 	 *  Track and we push into parser later; this is computed
369 	 *  early when we look for prec rules.
370 	 */
371 	public List<String> precRuleInitCodeBlocks = new ArrayList<String>();
372 
373     /** At least one rule has memoize=true */
374     public boolean atLeastOneRuleMemoizes;
375 
376     /** At least one backtrack=true in rule or decision or grammar. */
377     public boolean atLeastOneBacktrackOption;
378 
379 	/** Was this created from a COMBINED grammar? */
380 	public boolean implicitLexer;
381 
382 	/** Map a rule to it's Rule object */
383 	protected LinkedHashMap<String,Rule> nameToRuleMap = new LinkedHashMap<String,Rule>();
384 
385 	/** If this rule is a delegate, some rules might be overridden; don't
386 	 *  want to gen code for them.
387 	 */
388 	public Set<String> overriddenRules = new HashSet<String>();
389 
390 	/** The list of all rules referenced in this grammar, not defined here,
391 	 *  and defined in a delegate grammar.  Not all of these will be generated
392 	 *  in the recognizer for this file; only those that are affected by rule
393 	 *  definitions in this grammar.  I am not sure the Java target will need
394 	 *  this but I'm leaving in case other targets need it.
395 	 *  see NameSpaceChecker.lookForReferencesToUndefinedSymbols()
396 	 */
397 	protected Set<Rule> delegatedRuleReferences = new HashSet();
398 
399 	/** The ANTLRParser tracks lexer rules when reading combined grammars
400 	 *  so we can build the Tokens rule.
401 	 */
402 	public List<String> lexerRuleNamesInCombined = new ArrayList<String>();
403 
404 	/** Track the scopes defined outside of rules and the scopes associated
405 	 *  with all rules (even if empty).
406 	 */
407 	protected Map scopes = new HashMap();
408 
409 	/** An AST that records entire input grammar with all rules.  A simple
410 	 *  grammar with one rule, "grammar t; a : A | B ;", looks like:
411 	 * ( grammar t ( rule a ( BLOCK ( ALT A ) ( ALT B ) ) <end-of-rule> ) )
412 	 */
413 	protected GrammarAST grammarTree = null;
414 
415 	/** Each subrule/rule is a decision point and we must track them so we
416 	 *  can go back later and build DFA predictors for them.  This includes
417 	 *  all the rules, subrules, optional blocks, ()+, ()* etc...
418 	 */
419 	protected Vector<Decision> indexToDecision =
420 		new Vector<Decision>(INITIAL_DECISION_LIST_SIZE);
421 
422 	/** If non-null, this is the code generator we will use to generate
423 	 *  recognizers in the target language.
424 	 */
425 	protected CodeGenerator generator;
426 
427 	public NameSpaceChecker nameSpaceChecker = new NameSpaceChecker(this);
428 
429 	public LL1Analyzer ll1Analyzer = new LL1Analyzer(this);
430 
431 	/** For merged lexer/parsers, we must construct a separate lexer spec.
432 	 *  This is the template for lexer; put the literals first then the
433 	 *  regular rules.  We don't need to specify a token vocab import as
434 	 *  I make the new grammar import from the old all in memory; don't want
435 	 *  to force it to read from the disk.  Lexer grammar will have same
436 	 *  name as original grammar but will be in different filename.  Foo.g
437 	 *  with combined grammar will have FooParser.java generated and
438 	 *  Foo__.g with again Foo inside.  It will however generate FooLexer.java
439 	 *  as it's a lexer grammar.  A bit odd, but autogenerated.  Can tweak
440 	 *  later if we want.
441 	 */
442 	protected String lexerGrammarTemplate =
443 			"grammar(name, options, imports, actionNames, actions, literals, rules) ::= <<\n" +
444 			"lexer grammar <name>;\n" +
445 			"<if(options)>" +
446 			"options {\n" +
447 			"  <options:{it | <it.name>=<it.value>;<\\n>}>\n" +
448 			"}<\\n>\n" +
449 			"<endif>\n" +
450 			"<if(imports)>import <imports; separator=\", \">;<endif>\n" +
451 			"<actionNames,actions:{n,a|@<n> {<a>\\}\n}>\n" +
452 			"<literals:{it | <it.ruleName> : <it.literal> ;\n}>\n" +
453 			"<rules>\n" +
454 			">>\n";
455 	protected ST lexerGrammarST;
456 
457 	/** What file name holds this grammar? */
458 	protected String fileName;
459 
460 	/** How long in ms did it take to build DFAs for this grammar?
461 	 *  If this grammar is a combined grammar, it only records time for
462 	 *  the parser grammar component.  This only records the time to
463 	 *  do the LL(*) work; NFA->DFA conversion.
464 	 */
465 	public long DFACreationWallClockTimeInMS;
466 
467 	public int numberOfSemanticPredicates = 0;
468 	public int numberOfManualLookaheadOptions = 0;
469 	public Set<Integer> setOfNondeterministicDecisionNumbers = new HashSet<Integer>();
470 	public Set<Integer> setOfNondeterministicDecisionNumbersResolvedWithPredicates =
471 		new HashSet<Integer>();
472 
473 	/** Track decisions with syn preds specified for reporting.
474 	 *  This is the a set of BLOCK type AST nodes.
475 	 */
476 	public Set<GrammarAST> blocksWithSynPreds = new HashSet();
477 
478 	/** Track decisions that actually use the syn preds in the DFA.
479 	 *  Computed during NFA to DFA conversion.
480 	 */
481 	public Set<DFA> decisionsWhoseDFAsUsesSynPreds = new HashSet<DFA>();
482 
483 	/** Track names of preds so we can avoid generating preds that aren't used
484 	 *  Computed during NFA to DFA conversion.  Just walk accept states
485 	 *  and look for synpreds because that is the only state target whose
486 	 *  incident edges can have synpreds.  Same is try for
487 	 *  decisionsWhoseDFAsUsesSynPreds.
488 	 */
489 	public Set<String> synPredNamesUsedInDFA = new HashSet();
490 
491 	/** Track decisions with syn preds specified for reporting.
492 	 *  This is the a set of BLOCK type AST nodes.
493 	 */
494 	public Set<GrammarAST> blocksWithSemPreds = new HashSet();
495 
496 	/** Track decisions that actually use the syn preds in the DFA. */
497 	public Set<DFA> decisionsWhoseDFAsUsesSemPreds = new HashSet();
498 
499 	protected boolean allDecisionDFACreated = false;
500 
501 	/** We need a way to detect when a lexer grammar is autogenerated from
502 	 *  another grammar or we are just sending in a string representing a
503 	 *  grammar.  We don't want to generate a .tokens file, for example,
504 	 *  in such cases.
505 	 */
506 	protected boolean builtFromString = false;
507 
508 	/** Factored out the sanity checking code; delegate to it. */
509 	GrammarSanity sanity = new GrammarSanity(this);
510 
511 	/** Useful for asking questions about target during analysis */
512 	Target target;
513 
514 	/** Create a grammar from file name.  */
Grammar(Tool tool, String fileName, CompositeGrammar composite)515 	public Grammar(Tool tool, String fileName, CompositeGrammar composite) {
516 		this.composite = composite;
517 		setTool(tool);
518 		setFileName(fileName);
519 		// ensure we have the composite set to something
520 		if ( composite.delegateGrammarTreeRoot==null ) {
521 			composite.setDelegationRoot(this);
522 		}
523 		STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
524 		lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
525 		target = CodeGenerator.loadLanguageTarget((String) getOption("language"));
526 	}
527 
528 	/** Useful for when you are sure that you are not part of a composite
529 	 *  already.  Used in Interp/RandomPhrase and testing.
530 	 */
Grammar()531 	public Grammar() { this((Tool)null); }
532 
Grammar(Tool tool)533 	public Grammar(Tool tool) {
534 		setTool(tool);
535 		builtFromString = true;
536 		composite = new CompositeGrammar(this);
537 		STGroup lexerGrammarSTG = new STGroupString(lexerGrammarTemplate);
538 		lexerGrammarST = lexerGrammarSTG.getInstanceOf("grammar");
539 		target = CodeGenerator.loadLanguageTarget((String)getOption("language"));
540 	}
541 
542 	/** Used for testing; only useful on noncomposite grammars.*/
Grammar(String grammarString)543 	public Grammar(String grammarString)
544 			throws RecognitionException
545 	{
546 		this(null, grammarString);
547 	}
548 
549 	/** Used for testing and Interp/RandomPhrase.  Only useful on
550 	 *  noncomposite grammars.
551 	 */
Grammar(Tool tool, String grammarString)552 	public Grammar(Tool tool, String grammarString)
553 		throws RecognitionException
554 	{
555 		this(tool);
556 		setFileName("<string>");
557 		StringReader r = new StringReader(grammarString);
558 		parseAndBuildAST(r);
559 		composite.assignTokenTypes();
560 		//composite.translateLeftRecursiveRules();
561 		addRulesForSyntacticPredicates();
562 		composite.defineGrammarSymbols();
563 		//composite.createNFAs();
564 		checkNameSpaceAndActions();
565 	}
566 
setFileName(String fileName)567 	public void setFileName(String fileName) {
568 		this.fileName = fileName;
569 	}
570 
getFileName()571 	public String getFileName() {
572 		return fileName;
573 	}
574 
setName(String name)575 	public void setName(String name) {
576 		if ( name==null ) {
577 			return;
578 		}
579 		// don't error check autogenerated files (those with '__' in them)
580 		String saneFile = fileName.replace('\\', '/');
581 		int lastSlash = saneFile.lastIndexOf('/');
582 		String onlyFileName = saneFile.substring(lastSlash+1, fileName.length());
583 		if ( !builtFromString ) {
584 			int lastDot = onlyFileName.lastIndexOf('.');
585 			String onlyFileNameNoSuffix = null;
586 			if ( lastDot < 0 ) {
587 				ErrorManager.error(ErrorManager.MSG_FILENAME_EXTENSION_ERROR, fileName);
588 				onlyFileNameNoSuffix = onlyFileName+GRAMMAR_FILE_EXTENSION;
589 			}
590 			else {
591 				onlyFileNameNoSuffix = onlyFileName.substring(0,lastDot);
592 			}
593 			if ( !name.equals(onlyFileNameNoSuffix) ) {
594 				ErrorManager.error(ErrorManager.MSG_FILE_AND_GRAMMAR_NAME_DIFFER,
595 								   name,
596 								   fileName);
597 			}
598 		}
599 		this.name = name;
600 	}
601 
setGrammarContent(String grammarString)602 	public void setGrammarContent(String grammarString) throws RecognitionException {
603 		StringReader r = new StringReader(grammarString);
604 		parseAndBuildAST(r);
605 		composite.assignTokenTypes();
606 		composite.defineGrammarSymbols();
607 	}
608 
parseAndBuildAST()609 	public void parseAndBuildAST()
610 		throws IOException
611 	{
612 		FileReader fr = null;
613 		BufferedReader br = null;
614 		try {
615 			fr = new FileReader(fileName);
616 			br = new BufferedReader(fr);
617 			parseAndBuildAST(br);
618 			br.close();
619 			br = null;
620 		}
621 		finally {
622 			if ( br!=null ) {
623 				br.close();
624 			}
625 		}
626 	}
627 
parseAndBuildAST(Reader r)628 	public void parseAndBuildAST(Reader r) {
629 		// BUILD AST FROM GRAMMAR
630 		ANTLRLexer lexer;
631 		try {
632 			lexer = new ANTLRLexer(new ANTLRReaderStream(r));
633 		} catch (IOException e) {
634 			ErrorManager.internalError("unexpected stream error from parsing "+fileName, e);
635 			return;
636 		}
637 
638 		lexer.setFileName(this.getFileName());
639 		tokenBuffer = new CommonTokenStream(lexer);
640 		ANTLRParser parser = ANTLRParser.createParser(tokenBuffer);
641 		parser.setFileName(this.getFileName());
642 		ANTLRParser.grammar__return result = null;
643 		try {
644 			result = parser.grammar_(this);
645 		}
646 		catch (RecognitionException re) {
647 			ErrorManager.internalError("unexpected parser recognition error from "+fileName, re);
648 		}
649 
650         dealWithTreeFilterMode(); // tree grammar and filter=true?
651 
652         if ( lexer.hasASTOperator && !buildAST() ) {
653 			Object value = getOption("output");
654 			if ( value == null ) {
655 				ErrorManager.grammarWarning(ErrorManager.MSG_REWRITE_OR_OP_WITH_NO_OUTPUT_OPTION,
656 										    this, null);
657 				setOption("output", "AST", null);
658 			}
659 			else {
660 				ErrorManager.grammarError(ErrorManager.MSG_AST_OP_WITH_NON_AST_OUTPUT_OPTION,
661 										  this, null, value);
662 			}
663 		}
664 
665 		setGrammarTree((GrammarAST)result.getTree());
666 
667 		//if ( grammarTree!=null ) System.out.println("grammar tree: "+grammarTree.toStringTree());
668 
669 		grammarTree.setUnknownTokenBoundaries();
670 
671 		setFileName(lexer.getFileName()); // the lexer #src might change name
672 		if ( grammarTree==null || grammarTree.findFirstType(ANTLRParser.RULE)==null ) {
673 			ErrorManager.error(ErrorManager.MSG_NO_RULES, getFileName());
674 			return;
675 		}
676 	}
677 
dealWithTreeFilterMode()678     protected void dealWithTreeFilterMode() {
679         Object filterMode = (String)getOption("filter");
680         if ( type==TREE_PARSER && filterMode!=null && filterMode.toString().equals("true") ) {
681             // check for conflicting options
682             // filter => backtrack=true
683             // filter&&output=AST => rewrite=true
684             // filter&&output!=AST => error
685             // any deviation from valid option set is an error
686             Object backtrack = (String)getOption("backtrack");
687             Object output = getOption("output");
688             Object rewrite = getOption("rewrite");
689             if ( backtrack!=null && !backtrack.toString().equals("true") ) {
690                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
691                                    "backtrack", backtrack);
692             }
693             if ( output!=null && !output.toString().equals("AST") ) {
694                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
695                                    "output", output);
696                 setOption("output", "", null);
697             }
698             if ( rewrite!=null && !rewrite.toString().equals("true") ) {
699                 ErrorManager.error(ErrorManager.MSG_CONFLICTING_OPTION_IN_TREE_FILTER,
700                                    "rewrite", rewrite);
701             }
702             // set options properly
703             setOption("backtrack", "true", null);
704             if ( output!=null && output.toString().equals("AST") ) {
705                 setOption("rewrite", "true", null);
706             }
707             // @synpredgate set to state.backtracking==1 by code gen when filter=true
708             // superClass set in template target::treeParser
709         }
710     }
711 
translateLeftRecursiveRule(GrammarAST ruleAST)712 	public void translateLeftRecursiveRule(GrammarAST ruleAST) {
713 		//System.out.println(ruleAST.toStringTree());
714 		CommonTreeNodeStream input = new CommonTreeNodeStream(ruleAST);
715 		LeftRecursiveRuleAnalyzer leftRecursiveRuleWalker =
716 			new LeftRecursiveRuleAnalyzer(input, this, ruleAST.enclosingRuleName);
717 		boolean isLeftRec = false;
718 		try {
719 			//System.out.println("TESTING "+ruleAST.enclosingRuleName);
720 			isLeftRec = leftRecursiveRuleWalker.rec_rule(this);
721 		}
722 		catch (RecognitionException re) {
723 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE, re);
724 		}
725 		if ( !isLeftRec ) return;
726 		List<String> rules = new ArrayList<String>();
727 		rules.add( leftRecursiveRuleWalker.getArtificialPrecStartRule() ) ;
728 		rules.add( leftRecursiveRuleWalker.getArtificialOpPrecRule() );
729 		rules.add( leftRecursiveRuleWalker.getArtificialPrimaryRule() );
730 		for (String r : rules) {
731 			GrammarAST t = parseArtificialRule(r);
732 			addRule(grammarTree, t);
733 			//System.out.println(t.toStringTree());
734 		}
735 
736 		//precRuleInitCodeBlocks.add( precRuleWalker.getOpPrecJavaCode() );
737 	}
738 
defineGrammarSymbols()739 	public void defineGrammarSymbols() {
740 		if ( Tool.internalOption_PrintGrammarTree ) {
741 			System.out.println(grammarTree.toStringList());
742 		}
743 
744 		// DEFINE RULES
745 		//System.out.println("### define "+name+" rules");
746 		DefineGrammarItemsWalker defineItemsWalker = new DefineGrammarItemsWalker(new CommonTreeNodeStream(getGrammarTree()));
747 		try {
748 			defineItemsWalker.grammar_(this);
749 		}
750 		catch (RecognitionException re) {
751 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
752 							   re);
753 		}
754 	}
755 
756 	/** ANALYZE ACTIONS, LOOKING FOR LABEL AND ATTR REFS, sanity check */
checkNameSpaceAndActions()757 	public void checkNameSpaceAndActions() {
758 		examineAllExecutableActions();
759 		checkAllRulesForUselessLabels();
760 
761 		nameSpaceChecker.checkConflicts();
762 	}
763 
764 	/** Many imports are illegal such as lexer into a tree grammar */
validImport(Grammar delegate)765 	public boolean validImport(Grammar delegate) {
766 		List<Integer> validDelegators = validDelegations.get(delegate.type);
767 		return validDelegators!=null && validDelegators.contains(this.type);
768 	}
769 
770 	/** If the grammar is a combined grammar, return the text of the implicit
771 	 *  lexer grammar.
772 	 */
getLexerGrammar()773 	public String getLexerGrammar() {
774 		if ( lexerGrammarST.getAttribute("literals")==null &&
775 			 lexerGrammarST.getAttribute("rules")==null )
776 		{
777 			// if no rules, return nothing
778 			return null;
779 		}
780 		lexerGrammarST.add("name", name);
781 		// if there are any actions set for lexer, pass them in
782 		if ( getActions().get("lexer")!=null ) {
783 			lexerGrammarST.add("actionNames",
784 										getActions().get("lexer").keySet());
785 			lexerGrammarST.add("actions",
786 										getActions().get("lexer").values());
787 		}
788 		// make sure generated grammar has the same options
789 		if ( options!=null ) {
790 			Iterator optionNames = options.keySet().iterator();
791 			while (optionNames.hasNext()) {
792 				String optionName = (String) optionNames.next();
793 				if ( !doNotCopyOptionsToLexer.contains(optionName) ) {
794 					Object value = options.get(optionName);
795 					lexerGrammarST.addAggr("options.{name,value}", optionName, value);
796 				}
797 			}
798 		}
799 		return lexerGrammarST.render();
800 	}
801 
getImplicitlyGeneratedLexerFileName()802 	public String getImplicitlyGeneratedLexerFileName() {
803 		return name+
804 			   IGNORE_STRING_IN_GRAMMAR_FILE_NAME +
805 			   LEXER_GRAMMAR_FILE_EXTENSION;
806 	}
807 
808 	/** Get the name of the generated recognizer; may or may not be same
809 	 *  as grammar name.
810 	 *  Recognizer is TParser and TLexer from T if combined, else
811 	 *  just use T regardless of grammar type.
812 	 */
getRecognizerName()813 	public String getRecognizerName() {
814 		String suffix = "";
815 		List<Grammar> grammarsFromRootToMe = composite.getDelegators(this);
816 		//System.out.println("grammarsFromRootToMe="+grammarsFromRootToMe);
817 		String qualifiedName = name;
818 		if ( grammarsFromRootToMe!=null ) {
819 			StringBuffer buf = new StringBuffer();
820 			for (Grammar g : grammarsFromRootToMe) {
821 				buf.append(g.name);
822 				buf.append('_');
823 			}
824 			buf.append(name);
825 			qualifiedName = buf.toString();
826 		}
827 		if ( type==Grammar.COMBINED ||
828 			 (type==Grammar.LEXER && implicitLexer) )
829 		{
830 			suffix = Grammar.grammarTypeToFileNameSuffix[type];
831 		}
832 		return qualifiedName+suffix;
833 	}
834 
835 	/** Parse a rule we add artificially that is a list of the other lexer
836 	 *  rules like this: "Tokens : ID | INT | SEMI ;"  nextToken() will invoke
837 	 *  this to set the current token.  Add char literals before
838 	 *  the rule references.
839 	 *
840 	 *  If in filter mode, we want every alt to backtrack and we need to
841 	 *  do k=1 to force the "first token def wins" rule.  Otherwise, the
842 	 *  longest-match rule comes into play with LL(*).
843 	 *
844 	 *  The ANTLRParser antlr.g file now invokes this when parsing a lexer
845 	 *  grammar, which I think is proper even though it peeks at the info
846 	 *  that later phases will (re)compute.  It gets a list of lexer rules
847 	 *  and builds a string representing the rule; then it creates a parser
848 	 *  and adds the resulting tree to the grammar's tree.
849 	 */
addArtificialMatchTokensRule(GrammarAST grammarAST, List<String> ruleNames, List<String> delegateNames, boolean filterMode)850 	public GrammarAST addArtificialMatchTokensRule(GrammarAST grammarAST,
851 												   List<String> ruleNames,
852 												   List<String> delegateNames,
853 												   boolean filterMode) {
854 		ST matchTokenRuleST = null;
855 		if ( filterMode ) {
856 			matchTokenRuleST = new ST(
857 					ARTIFICIAL_TOKENS_RULENAME+
858 					" options {k=1; backtrack=true;} : <rules; separator=\"|\">;");
859 		}
860 		else {
861 			matchTokenRuleST = new ST(
862 					ARTIFICIAL_TOKENS_RULENAME+" : <rules; separator=\"|\">;");
863 		}
864 
865 		// Now add token rule references
866 		for (int i = 0; i < ruleNames.size(); i++) {
867 			String rname = (String) ruleNames.get(i);
868 			matchTokenRuleST.add("rules", rname);
869 		}
870 		for (int i = 0; i < delegateNames.size(); i++) {
871 			String dname = (String) delegateNames.get(i);
872 			matchTokenRuleST.add("rules", dname+".Tokens");
873 		}
874 		//System.out.println("tokens rule: "+matchTokenRuleST.toString());
875 		GrammarAST r = parseArtificialRule(matchTokenRuleST.render());
876 		addRule(grammarAST, r);
877 		//addRule((GrammarAST)parser.getAST());
878 		//return (GrammarAST)parser.getAST();
879 		return r;
880 	}
881 
parseArtificialRule(String ruleText)882 	public GrammarAST parseArtificialRule(String ruleText) {
883 		ANTLRLexer lexer = new ANTLRLexer(new ANTLRStringStream(ruleText));
884 		ANTLRParser parser = ANTLRParser.createParser(new CommonTokenStream(lexer));
885 		parser.setGrammar(this);
886 		parser.setGrammarType(this.type);
887 		try {
888 			ANTLRParser.rule_return result = parser.rule();
889 			return (GrammarAST)result.getTree();
890 		}
891 		catch (Exception e) {
892 			ErrorManager.error(ErrorManager.MSG_ERROR_CREATING_ARTIFICIAL_RULE,
893 							   e);
894 			return null;
895 		}
896 	}
897 
addRule(GrammarAST grammarTree, GrammarAST t)898 	public void addRule(GrammarAST grammarTree, GrammarAST t) {
899 		GrammarAST p = null;
900 		for (int i = 0; i < grammarTree.getChildCount(); i++ ) {
901 			p = (GrammarAST)grammarTree.getChild(i);
902 			if (p == null || p.getType() == ANTLRParser.RULE || p.getType() == ANTLRParser.PREC_RULE) {
903 				break;
904 			}
905 		}
906 
907 		if (p != null) {
908 			grammarTree.addChild(t);
909 		}
910 	}
911 
912 	/** for any syntactic predicates, we need to define rules for them; they will get
913 	 *  defined automatically like any other rule. :)
914 	 */
getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)915 	protected List getArtificialRulesForSyntacticPredicates(LinkedHashMap<String,GrammarAST> nameToSynpredASTMap)
916 	{
917 		List<GrammarAST> rules = new ArrayList<GrammarAST>();
918 		if ( nameToSynpredASTMap==null ) {
919 			return rules;
920 		}
921 		boolean isLexer = grammarTree.getType()==ANTLRParser.LEXER_GRAMMAR;
922 		for (String synpredName : nameToSynpredASTMap.keySet()) {
923 			GrammarAST fragmentAST = nameToSynpredASTMap.get(synpredName);
924 			GrammarAST ruleAST =
925 				ANTLRParser.createSimpleRuleAST(synpredName,
926 												fragmentAST,
927 												isLexer);
928 			rules.add(ruleAST);
929 		}
930 		return rules;
931 	}
932 
addRulesForSyntacticPredicates()933 	public void addRulesForSyntacticPredicates() {
934 		// Get syn pred rules and add to existing tree
935 		List synpredRules =
936 			getArtificialRulesForSyntacticPredicates(nameToSynpredASTMap);
937 		for (int i = 0; i < synpredRules.size(); i++) {
938 			GrammarAST rAST = (GrammarAST) synpredRules.get(i);
939 			grammarTree.addChild(rAST);
940 		}
941 	}
942 
943 	/** Walk the list of options, altering this Grammar object according
944 	 *  to any I recognize.
945 	protected void processOptions() {
946 		Iterator optionNames = options.keySet().iterator();
947 		while (optionNames.hasNext()) {
948 			String optionName = (String) optionNames.next();
949 			Object value = options.get(optionName);
950 			if ( optionName.equals("tokenVocab") ) {
951 
952 			}
953 		}
954 	}
955 	 */
956 
957 	/** Define all the rule begin/end NFAStates to solve forward reference
958 	 *  issues.  Critical for composite grammars too.
959 	 *  This is normally called on all root/delegates manually and then
960 	 *  buildNFA() is called afterwards because the NFA construction needs
961 	 *  to see rule start/stop states from potentially every grammar. Has
962 	 *  to be have these created a priori.  Testing routines will often
963 	 *  just call buildNFA(), which forces a call to this method if not
964 	 *  done already. Works ONLY for single noncomposite grammars.
965 	 */
createRuleStartAndStopNFAStates()966 	public void createRuleStartAndStopNFAStates() {
967 		//System.out.println("### createRuleStartAndStopNFAStates "+getGrammarTypeString()+" grammar "+name+" NFAs");
968 		if ( nfa!=null ) {
969 			return;
970 		}
971 		nfa = new NFA(this);
972 		factory = new NFAFactory(nfa);
973 
974 		Collection rules = getRules();
975 		for (Iterator itr = rules.iterator(); itr.hasNext();) {
976 			Rule r = (Rule) itr.next();
977 			String ruleName = r.name;
978 			NFAState ruleBeginState = factory.newState();
979 			ruleBeginState.setDescription("rule "+ruleName+" start");
980 			ruleBeginState.enclosingRule = r;
981 			r.startState = ruleBeginState;
982 			NFAState ruleEndState = factory.newState();
983 			ruleEndState.setDescription("rule "+ruleName+" end");
984 			ruleEndState.setAcceptState(true);
985 			ruleEndState.enclosingRule = r;
986 			r.stopState = ruleEndState;
987 		}
988 	}
989 
buildNFA()990 	public void buildNFA() {
991 		if ( nfa==null ) {
992 			createRuleStartAndStopNFAStates();
993 		}
994 		if ( nfa.complete ) {
995 			// don't let it create more than once; has side-effects
996 			return;
997 		}
998 		//System.out.println("### build "+getGrammarTypeString()+" grammar "+name+" NFAs");
999 		if ( getRules().size()==0 ) {
1000 			return;
1001 		}
1002 
1003 		CommonTreeNodeStream input = new CommonTreeNodeStream(getGrammarTree());
1004 		TreeToNFAConverter nfaBuilder = new TreeToNFAConverter(input, this, nfa, factory);
1005 		try {
1006 			nfaBuilder.grammar_();
1007 		}
1008 		catch (RecognitionException re) {
1009 			ErrorManager.error(ErrorManager.MSG_BAD_AST_STRUCTURE,
1010 							   name,
1011 							   re);
1012 		}
1013 		nfa.complete = true;
1014 	}
1015 
1016 	/** For each decision in this grammar, compute a single DFA using the
1017 	 *  NFA states associated with the decision.  The DFA construction
1018 	 *  determines whether or not the alternatives in the decision are
1019 	 *  separable using a regular lookahead language.
1020 	 *
1021 	 *  Store the lookahead DFAs in the AST created from the user's grammar
1022 	 *  so the code generator or whoever can easily access it.
1023 	 *
1024 	 *  This is a separate method because you might want to create a
1025 	 *  Grammar without doing the expensive analysis.
1026 	 */
createLookaheadDFAs()1027 	public void createLookaheadDFAs() {
1028 		createLookaheadDFAs(true);
1029 	}
1030 
createLookaheadDFAs(boolean wackTempStructures)1031 	public void createLookaheadDFAs(boolean wackTempStructures) {
1032 		if ( nfa==null ) {
1033 			buildNFA();
1034 		}
1035 
1036 		// CHECK FOR LEFT RECURSION; Make sure we can actually do analysis
1037 		checkAllRulesForLeftRecursion();
1038 
1039 		/*
1040 		// was there a severe problem while sniffing the grammar?
1041 		if ( ErrorManager.doNotAttemptAnalysis() ) {
1042 			return;
1043 		}
1044 		*/
1045 
1046 		long start = System.currentTimeMillis();
1047 
1048 		//System.out.println("### create DFAs");
1049 		int numDecisions = getNumberOfDecisions();
1050 		if ( NFAToDFAConverter.SINGLE_THREADED_NFA_CONVERSION ) {
1051 			for (int decision=1; decision<=numDecisions; decision++) {
1052 				NFAState decisionStartState = getDecisionNFAStartState(decision);
1053 				if ( leftRecursiveRules.contains(decisionStartState.enclosingRule) ) {
1054 					// don't bother to process decisions within left recursive rules.
1055 					if ( composite.watchNFAConversion ) {
1056 						System.out.println("ignoring decision "+decision+
1057 										   " within left-recursive rule "+decisionStartState.enclosingRule.name);
1058 					}
1059 					continue;
1060 				}
1061 				if ( !externalAnalysisAbort && decisionStartState.getNumberOfTransitions()>1 ) {
1062 					Rule r = decisionStartState.enclosingRule;
1063 					if ( r.isSynPred && !synPredNamesUsedInDFA.contains(r.name) ) {
1064 						continue;
1065 					}
1066 					DFA dfa = null;
1067 					// if k=* or k=1, try LL(1)
1068 					if ( getUserMaxLookahead(decision)==0 ||
1069 						 getUserMaxLookahead(decision)==1 )
1070 					{
1071 						dfa = createLL_1_LookaheadDFA(decision);
1072 					}
1073 					if ( dfa==null ) {
1074 						if ( composite.watchNFAConversion ) {
1075 							System.out.println("decision "+decision+
1076 											   " not suitable for LL(1)-optimized DFA analysis");
1077 						}
1078 						dfa = createLookaheadDFA(decision, wackTempStructures);
1079 					}
1080 					if ( dfa.startState==null ) {
1081 						// something went wrong; wipe out DFA
1082 						setLookaheadDFA(decision, null);
1083 					}
1084 					if ( Tool.internalOption_PrintDFA ) {
1085 						System.out.println("DFA d="+decision);
1086 						FASerializer serializer = new FASerializer(nfa.grammar);
1087 						String result = serializer.serialize(dfa.startState);
1088 						System.out.println(result);
1089 					}
1090 				}
1091 			}
1092 		}
1093 		else {
1094 			ErrorManager.info("two-threaded DFA conversion");
1095 			// create a barrier expecting n DFA and this main creation thread
1096 			Barrier barrier = new Barrier(3);
1097 			// assume 2 CPU for now
1098 			int midpoint = numDecisions/2;
1099 			NFAConversionThread t1 =
1100 				new NFAConversionThread(this, barrier, 1, midpoint);
1101 			new Thread(t1).start();
1102 			if ( midpoint == (numDecisions/2) ) {
1103 				midpoint++;
1104 			}
1105 			NFAConversionThread t2 =
1106 				new NFAConversionThread(this, barrier, midpoint, numDecisions);
1107 			new Thread(t2).start();
1108 			// wait for these two threads to finish
1109 			try {
1110 				barrier.waitForRelease();
1111 			}
1112 			catch(InterruptedException e) {
1113 				ErrorManager.internalError("what the hell? DFA interruptus", e);
1114 			}
1115 		}
1116 
1117 		long stop = System.currentTimeMillis();
1118 		DFACreationWallClockTimeInMS = stop - start;
1119 
1120 		// indicate that we've finished building DFA (even if #decisions==0)
1121 		allDecisionDFACreated = true;
1122 	}
1123 
createLL_1_LookaheadDFA(int decision)1124 	public DFA createLL_1_LookaheadDFA(int decision) {
1125 		Decision d = getDecision(decision);
1126 		String enclosingRule = d.startState.enclosingRule.name;
1127 		Rule r = d.startState.enclosingRule;
1128 		NFAState decisionStartState = getDecisionNFAStartState(decision);
1129 
1130 		if ( composite.watchNFAConversion ) {
1131 			System.out.println("--------------------\nattempting LL(1) DFA (d="
1132 							   +decisionStartState.getDecisionNumber()+") for "+
1133 							   decisionStartState.getDescription());
1134 		}
1135 
1136 		if ( r.isSynPred && !synPredNamesUsedInDFA.contains(enclosingRule) ) {
1137 			return null;
1138 		}
1139 
1140 		// compute lookahead for each alt
1141 		int numAlts = getNumberOfAltsForDecisionNFA(decisionStartState);
1142 		LookaheadSet[] altLook = new LookaheadSet[numAlts+1];
1143 		for (int alt = 1; alt <= numAlts; alt++) {
1144 			int walkAlt =
1145 				decisionStartState.translateDisplayAltToWalkAlt(alt);
1146 			NFAState altLeftEdge = getNFAStateForAltOfDecision(decisionStartState, walkAlt);
1147 			NFAState altStartState = (NFAState)altLeftEdge.transition[0].target;
1148 			//System.out.println("alt "+alt+" start state = "+altStartState.stateNumber);
1149 			altLook[alt] = ll1Analyzer.LOOK(altStartState);
1150 			//System.out.println("alt "+alt+": "+altLook[alt].toString(this));
1151 		}
1152 
1153 		// compare alt i with alt j for disjointness
1154 		boolean decisionIsLL_1 = true;
1155 outer:
1156 		for (int i = 1; i <= numAlts; i++) {
1157 			for (int j = i+1; j <= numAlts; j++) {
1158 				/*
1159 				System.out.println("compare "+i+", "+j+": "+
1160 								   altLook[i].toString(this)+" with "+
1161 								   altLook[j].toString(this));
1162 				*/
1163 				LookaheadSet collision = altLook[i].intersection(altLook[j]);
1164 				if ( !collision.isNil() ) {
1165 					//System.out.println("collision (non-LL(1)): "+collision.toString(this));
1166 					decisionIsLL_1 = false;
1167 					break outer;
1168 				}
1169 			}
1170 		}
1171 
1172 		boolean foundConfoundingPredicate =
1173 			ll1Analyzer.detectConfoundingPredicates(decisionStartState);
1174 		if ( decisionIsLL_1 && !foundConfoundingPredicate ) {
1175 			// build an LL(1) optimized DFA with edge for each altLook[i]
1176 			if ( NFAToDFAConverter.debug ) {
1177 				System.out.println("decision "+decision+" is simple LL(1)");
1178 			}
1179 			DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, altLook);
1180 			setLookaheadDFA(decision, lookaheadDFA);
1181 			updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1182 			return lookaheadDFA;
1183 		}
1184 
1185 		// not LL(1) but perhaps we can solve with simplified predicate search
1186 		// even if k=1 set manually, only resolve here if we have preds; i.e.,
1187 		// don't resolve etc...
1188 
1189 		/*
1190 		SemanticContext visiblePredicates =
1191 			ll1Analyzer.getPredicates(decisionStartState);
1192 		boolean foundConfoundingPredicate =
1193 			ll1Analyzer.detectConfoundingPredicates(decisionStartState);
1194 			*/
1195 
1196 		// exit if not forced k=1 or we found a predicate situation we
1197 		// can't handle: predicates in rules invoked from this decision.
1198 		if ( getUserMaxLookahead(decision)!=1 || // not manually set to k=1
1199 			 !getAutoBacktrackMode(decision) ||
1200 			 foundConfoundingPredicate )
1201 		{
1202 			//System.out.println("trying LL(*)");
1203 			return null;
1204 		}
1205 
1206 		List<IntervalSet> edges = new ArrayList<IntervalSet>();
1207 		for (int i = 1; i < altLook.length; i++) {
1208 			LookaheadSet s = altLook[i];
1209 			edges.add((IntervalSet)s.tokenTypeSet);
1210 		}
1211 		List<IntervalSet> disjoint = makeEdgeSetsDisjoint(edges);
1212 		//System.out.println("disjoint="+disjoint);
1213 
1214 		MultiMap<IntervalSet, Integer> edgeMap = new MultiMap<IntervalSet, Integer>();
1215 		for (int i = 0; i < disjoint.size(); i++) {
1216 			IntervalSet ds = (IntervalSet) disjoint.get(i);
1217 			for (int alt = 1; alt < altLook.length; alt++) {
1218 				LookaheadSet look = altLook[alt];
1219 				if ( !ds.and(look.tokenTypeSet).isNil() ) {
1220 					edgeMap.map(ds, alt);
1221 				}
1222 			}
1223 		}
1224 		//System.out.println("edge map: "+edgeMap);
1225 
1226 		// TODO: how do we know we covered stuff?
1227 
1228 		// build an LL(1) optimized DFA with edge for each altLook[i]
1229 		DFA lookaheadDFA = new LL1DFA(decision, decisionStartState, edgeMap);
1230 		setLookaheadDFA(decision, lookaheadDFA);
1231 
1232 		// create map from line:col to decision DFA (for ANTLRWorks)
1233 		updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1234 
1235 		return lookaheadDFA;
1236 	}
1237 
updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA)1238 	private void updateLineColumnToLookaheadDFAMap(DFA lookaheadDFA) {
1239 		GrammarAST decisionAST = nfa.grammar.getDecisionBlockAST(lookaheadDFA.decisionNumber);
1240 		int line = decisionAST.getLine();
1241 		int col = decisionAST.getCharPositionInLine();
1242 		lineColumnToLookaheadDFAMap.put(new StringBuffer().append(line + ":")
1243 										.append(col).toString(), lookaheadDFA);
1244 	}
1245 
makeEdgeSetsDisjoint(List<IntervalSet> edges)1246 	protected List<IntervalSet> makeEdgeSetsDisjoint(List<IntervalSet> edges) {
1247 		OrderedHashSet<IntervalSet> disjointSets = new OrderedHashSet<IntervalSet>();
1248 		// walk each incoming edge label/set and add to disjoint set
1249 		int numEdges = edges.size();
1250 		for (int e = 0; e < numEdges; e++) {
1251 			IntervalSet t = (IntervalSet) edges.get(e);
1252 			if ( disjointSets.contains(t) ) { // exact set present
1253 				continue;
1254 			}
1255 
1256 			// compare t with set i for disjointness
1257 			IntervalSet remainder = t; // remainder starts out as whole set to add
1258 			int numDisjointElements = disjointSets.size();
1259 			for (int i = 0; i < numDisjointElements; i++) {
1260 				IntervalSet s_i = (IntervalSet)disjointSets.get(i);
1261 
1262 				if ( t.and(s_i).isNil() ) { // nothing in common
1263 					continue;
1264 				}
1265 				//System.out.println(label+" collides with "+rl);
1266 
1267 				// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
1268 				// (ignoring s_i-t if nil; don't put in list)
1269 
1270 				// Replace existing s_i with intersection since we
1271 				// know that will always be a non nil character class
1272 				IntervalSet intersection = (IntervalSet)s_i.and(t);
1273 				disjointSets.set(i, intersection);
1274 
1275 				// Compute s_i-t to see what is in current set and not in incoming
1276 				IntSet existingMinusNewElements = s_i.subtract(t);
1277 				//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
1278 				if ( !existingMinusNewElements.isNil() ) {
1279 					// found a new character class, add to the end (doesn't affect
1280 					// outer loop duration due to n computation a priori.
1281 					disjointSets.add(existingMinusNewElements);
1282 				}
1283 
1284 				// anything left to add to the reachableLabels?
1285 				remainder = (IntervalSet)t.subtract(s_i);
1286 				if ( remainder.isNil() ) {
1287 					break; // nothing left to add to set.  done!
1288 				}
1289 
1290 				t = remainder;
1291 			}
1292 			if ( !remainder.isNil() ) {
1293 				disjointSets.add(remainder);
1294 			}
1295 		}
1296 		return disjointSets.elements();
1297 	}
1298 
createLookaheadDFA(int decision, boolean wackTempStructures)1299 	public DFA createLookaheadDFA(int decision, boolean wackTempStructures) {
1300 		Decision d = getDecision(decision);
1301 		String enclosingRule = d.startState.enclosingRule.name;
1302 		Rule r = d.startState.enclosingRule;
1303 
1304 		//System.out.println("createLookaheadDFA(): "+enclosingRule+" dec "+decision+"; synprednames prev used "+synPredNamesUsedInDFA);
1305 		NFAState decisionStartState = getDecisionNFAStartState(decision);
1306 		long startDFA=0,stopDFA=0;
1307 		if ( composite.watchNFAConversion ) {
1308 			System.out.println("--------------------\nbuilding lookahead DFA (d="
1309 							   +decisionStartState.getDecisionNumber()+") for "+
1310 							   decisionStartState.getDescription());
1311 			startDFA = System.currentTimeMillis();
1312 		}
1313 
1314 		DFA lookaheadDFA = new DFA(decision, decisionStartState);
1315 		// Retry to create a simpler DFA if analysis failed (non-LL(*),
1316 		// recursion overflow, or time out).
1317 		boolean failed =
1318 			lookaheadDFA.probe.isNonLLStarDecision() ||
1319 			lookaheadDFA.probe.analysisOverflowed();
1320 		if ( failed && lookaheadDFA.okToRetryDFAWithK1() ) {
1321 			// set k=1 option and try again.
1322 			// First, clean up tracking stuff
1323 			decisionsWhoseDFAsUsesSynPreds.remove(lookaheadDFA);
1324 			// TODO: clean up synPredNamesUsedInDFA also (harder)
1325 			d.blockAST.setBlockOption(this, "k", Utils.integer(1));
1326 			if ( composite.watchNFAConversion ) {
1327 				System.out.print("trying decision "+decision+
1328 								 " again with k=1; reason: "+
1329 								 lookaheadDFA.getReasonForFailure());
1330 			}
1331 			lookaheadDFA = null; // make sure other memory is "free" before redoing
1332 			lookaheadDFA = new DFA(decision, decisionStartState);
1333 		}
1334 
1335 		setLookaheadDFA(decision, lookaheadDFA);
1336 
1337 		if ( wackTempStructures ) {
1338 			for (DFAState s : lookaheadDFA.getUniqueStates().values()) {
1339 				s.reset();
1340 			}
1341 		}
1342 
1343 		// create map from line:col to decision DFA (for ANTLRWorks)
1344 		updateLineColumnToLookaheadDFAMap(lookaheadDFA);
1345 
1346 		if ( composite.watchNFAConversion ) {
1347 			stopDFA = System.currentTimeMillis();
1348 			System.out.println("cost: "+lookaheadDFA.getNumberOfStates()+
1349 							   " states, "+(int)(stopDFA-startDFA)+" ms");
1350 		}
1351 		//System.out.println("after create DFA; synPredNamesUsedInDFA="+synPredNamesUsedInDFA);
1352 		return lookaheadDFA;
1353 	}
1354 
1355 	/** Terminate DFA creation (grammar analysis).
1356 	 */
externallyAbortNFAToDFAConversion()1357 	public void externallyAbortNFAToDFAConversion() {
1358 		externalAnalysisAbort = true;
1359 	}
1360 
NFAToDFAConversionExternallyAborted()1361 	public boolean NFAToDFAConversionExternallyAborted() {
1362 		return externalAnalysisAbort;
1363 	}
1364 
1365 	/** Return a new unique integer in the token type space */
getNewTokenType()1366 	public int getNewTokenType() {
1367 		composite.maxTokenType++;
1368 		return composite.maxTokenType;
1369 	}
1370 
1371 	/** Define a token at a particular token type value.  Blast an
1372 	 *  old value with a new one.  This is called normal grammar processsing
1373 	 *  and during import vocab operations to set tokens with specific values.
1374 	 */
defineToken(String text, int tokenType)1375 	public void defineToken(String text, int tokenType) {
1376 		//System.out.println("defineToken("+text+", "+tokenType+")");
1377 		if ( composite.tokenIDToTypeMap.get(text)!=null ) {
1378 			// already defined?  Must be predefined one like EOF;
1379 			// do nothing
1380 			return;
1381 		}
1382 		// the index in the typeToTokenList table is actually shifted to
1383 		// hold faux labels as you cannot have negative indices.
1384 		if ( text.charAt(0)=='\'' ) {
1385 			composite.stringLiteralToTypeMap.put(text, Utils.integer(tokenType));
1386 			// track in reverse index too
1387 			if ( tokenType>=composite.typeToStringLiteralList.size() ) {
1388 				composite.typeToStringLiteralList.setSize(tokenType+1);
1389 			}
1390 			composite.typeToStringLiteralList.set(tokenType, text);
1391 		}
1392 		else { // must be a label like ID
1393 			composite.tokenIDToTypeMap.put(text, Utils.integer(tokenType));
1394 		}
1395 		int index = Label.NUM_FAUX_LABELS+tokenType-1;
1396 		//System.out.println("defining "+name+" token "+text+" at type="+tokenType+", index="+index);
1397 		composite.maxTokenType = Math.max(composite.maxTokenType, tokenType);
1398 		if ( index>=composite.typeToTokenList.size() ) {
1399 			composite.typeToTokenList.setSize(index+1);
1400 		}
1401 		String prevToken = (String)composite.typeToTokenList.get(index);
1402 		if ( prevToken==null || prevToken.charAt(0)=='\'' ) {
1403 			// only record if nothing there before or if thing before was a literal
1404 			composite.typeToTokenList.set(index, text);
1405 		}
1406 	}
1407 
1408 	/** Define a new rule.  A new rule index is created by incrementing
1409 	 *  ruleIndex.
1410 	 */
defineRule(Token ruleToken, String modifier, Map options, GrammarAST tree, GrammarAST argActionAST, int numAlts)1411 	public void defineRule(Token ruleToken,
1412 						   String modifier,
1413 						   Map options,
1414 						   GrammarAST tree,
1415 						   GrammarAST argActionAST,
1416 						   int numAlts)
1417 	{
1418 		String ruleName = ruleToken.getText();
1419 		if ( getLocallyDefinedRule(ruleName)!=null ) {
1420 			ErrorManager.grammarError(ErrorManager.MSG_RULE_REDEFINITION,
1421 									  this, ruleToken, ruleName);
1422 			return;
1423 		}
1424 
1425 		if ( (type==Grammar.PARSER||type==Grammar.TREE_PARSER) &&
1426 			 Character.isUpperCase(ruleName.charAt(0)) )
1427 		{
1428 			ErrorManager.grammarError(ErrorManager.MSG_LEXER_RULES_NOT_ALLOWED,
1429 									  this, ruleToken, ruleName);
1430 			return;
1431 		}
1432 
1433 		Rule r = new Rule(this, ruleName, composite.ruleIndex, numAlts);
1434 		/*
1435 		System.out.println("defineRule("+ruleName+",modifier="+modifier+
1436 						   "): index="+r.index+", nalts="+numAlts);
1437 		*/
1438 		r.modifier = modifier;
1439 		nameToRuleMap.put(ruleName, r);
1440 		setRuleAST(ruleName, tree);
1441 		r.setOptions(options, ruleToken);
1442 		r.argActionAST = argActionAST;
1443 		composite.ruleIndexToRuleList.setSize(composite.ruleIndex+1);
1444 		composite.ruleIndexToRuleList.set(composite.ruleIndex, r);
1445 		composite.ruleIndex++;
1446 		if ( ruleName.startsWith(SYNPRED_RULE_PREFIX) ) {
1447 			r.isSynPred = true;
1448 		}
1449 	}
1450 
1451 	/** Define a new predicate and get back its name for use in building
1452 	 *  a semantic predicate reference to the syn pred.
1453 	 */
defineSyntacticPredicate(GrammarAST blockAST, String currentRuleName)1454 	public String defineSyntacticPredicate(GrammarAST blockAST,
1455 										   String currentRuleName)
1456 	{
1457 		if ( nameToSynpredASTMap==null ) {
1458 			nameToSynpredASTMap = new LinkedHashMap();
1459 		}
1460 		String predName =
1461 			SYNPRED_RULE_PREFIX+(nameToSynpredASTMap.size() + 1)+"_"+name;
1462 		blockAST.setTreeEnclosingRuleNameDeeply(predName);
1463 		nameToSynpredASTMap.put(predName, blockAST);
1464 		return predName;
1465 	}
1466 
getSyntacticPredicates()1467 	public LinkedHashMap getSyntacticPredicates() {
1468 		return nameToSynpredASTMap;
1469 	}
1470 
getSyntacticPredicate(String name)1471 	public GrammarAST getSyntacticPredicate(String name) {
1472 		if ( nameToSynpredASTMap==null ) {
1473 			return null;
1474 		}
1475 		return (GrammarAST)nameToSynpredASTMap.get(name);
1476 	}
1477 
synPredUsedInDFA(DFA dfa, SemanticContext semCtx)1478 	public void synPredUsedInDFA(DFA dfa, SemanticContext semCtx) {
1479 		decisionsWhoseDFAsUsesSynPreds.add(dfa);
1480 		semCtx.trackUseOfSyntacticPredicates(this); // walk ctx looking for preds
1481 	}
1482 
1483 	/*
1484 	public Set<Rule> getRuleNamesVisitedDuringLOOK() {
1485 		return rulesSensitiveToOtherRules;
1486 	}
1487 	*/
1488 
1489 	/** Given @scope::name {action} define it for this grammar.  Later,
1490 	 *  the code generator will ask for the actions table.  For composite
1491      *  grammars, make sure header action propogates down to all delegates.
1492 	 */
defineNamedAction(GrammarAST ampersandAST, String scope, GrammarAST nameAST, GrammarAST actionAST)1493 	public void defineNamedAction(GrammarAST ampersandAST,
1494 								  String scope,
1495 								  GrammarAST nameAST,
1496 								  GrammarAST actionAST)
1497 	{
1498 		if ( scope==null ) {
1499 			scope = getDefaultActionScope(type);
1500 		}
1501 		//System.out.println("Grammar "+name+" define @"+scope+"::"+nameAST.getText()+"{"+actionAST.getText()+"}");
1502 		String actionName = nameAST.getText();
1503 		Map<String, Object> scopeActions = getActions().get(scope);
1504 		if ( scopeActions==null ) {
1505 			scopeActions = new HashMap<String, Object>();
1506 			getActions().put(scope, scopeActions);
1507 		}
1508 		Object a = scopeActions.get(actionName);
1509 		if ( a!=null ) {
1510 			ErrorManager.grammarError(
1511 				ErrorManager.MSG_ACTION_REDEFINITION,this,
1512 				nameAST.getToken(),nameAST.getText());
1513 		}
1514 		else {
1515 			scopeActions.put(actionName,actionAST);
1516 		}
1517         // propogate header (regardless of scope (lexer, parser, ...) ?
1518         if ( this==composite.getRootGrammar() && actionName.equals("header") ) {
1519             List<Grammar> allgrammars = composite.getRootGrammar().getDelegates();
1520             for (Grammar delegate : allgrammars) {
1521 				if ( target.isValidActionScope(delegate.type, scope) ) {
1522 					//System.out.println("propogate to "+delegate.name);
1523                 	delegate.defineNamedAction(ampersandAST, scope, nameAST, actionAST);
1524 				}
1525             }
1526         }
1527     }
1528 
setSynPredGateIfNotAlready(ST gateST)1529     public void setSynPredGateIfNotAlready(ST gateST) {
1530         String scope = getDefaultActionScope(type);
1531         Map<String, Object> actionsForGrammarScope = getActions().get(scope);
1532         // if no synpredgate action set by user then set
1533         if ( (actionsForGrammarScope==null ||
1534              !actionsForGrammarScope.containsKey(Grammar.SYNPREDGATE_ACTION_NAME)) )
1535         {
1536             if ( actionsForGrammarScope==null ) {
1537                 actionsForGrammarScope=new HashMap<String, Object>();
1538                 getActions().put(scope, actionsForGrammarScope);
1539             }
1540             actionsForGrammarScope.put(Grammar.SYNPREDGATE_ACTION_NAME,
1541                                        gateST);
1542         }
1543     }
1544 
getActions()1545 	public Map<String, Map<String, Object>> getActions() {
1546 		return actions;
1547 	}
1548 
1549 	/** Given a grammar type, what should be the default action scope?
1550 	 *  If I say @members in a COMBINED grammar, for example, the
1551 	 *  default scope should be "parser".
1552 	 */
getDefaultActionScope(int grammarType)1553 	public String getDefaultActionScope(int grammarType) {
1554 		switch (grammarType) {
1555 			case Grammar.LEXER :
1556 				return "lexer";
1557 			case Grammar.PARSER :
1558 			case Grammar.COMBINED :
1559 				return "parser";
1560 			case Grammar.TREE_PARSER :
1561 				return "treeparser";
1562 		}
1563 		return null;
1564 	}
1565 
defineLexerRuleFoundInParser(Token ruleToken, GrammarAST ruleAST)1566 	public void defineLexerRuleFoundInParser(Token ruleToken,
1567 											 GrammarAST ruleAST)
1568 	{
1569 //		System.out.println("rule tree is:\n"+ruleAST.toStringTree());
1570 		/*
1571 		String ruleText = tokenBuffer.toOriginalString(ruleAST.ruleStartTokenIndex,
1572 											   ruleAST.ruleStopTokenIndex);
1573 		*/
1574 		// first, create the text of the rule
1575 		StringBuffer buf = new StringBuffer();
1576 		buf.append("// $ANTLR src \"");
1577 		buf.append(getFileName());
1578 		buf.append("\" ");
1579 		buf.append(ruleAST.getLine());
1580 		buf.append("\n");
1581 		for (int i=ruleAST.getTokenStartIndex();
1582 			 i<=ruleAST.getTokenStopIndex() && i<tokenBuffer.size();
1583 			 i++)
1584 		{
1585 			CommonToken t = (CommonToken)tokenBuffer.get(i);
1586 			// undo the text deletions done by the lexer (ugh)
1587 			if ( t.getType()==ANTLRParser.BLOCK ) {
1588 				buf.append("(");
1589 			}
1590 			else if ( t.getType()==ANTLRParser.ACTION ) {
1591 				buf.append("{");
1592 				buf.append(t.getText());
1593 				buf.append("}");
1594 			}
1595 			else if ( t.getType()==ANTLRParser.SEMPRED ||
1596 					  t.getType()==ANTLRParser.SYN_SEMPRED ||
1597 					  t.getType()==ANTLRParser.GATED_SEMPRED ||
1598 					  t.getType()==ANTLRParser.BACKTRACK_SEMPRED )
1599 			{
1600 				buf.append("{");
1601 				buf.append(t.getText());
1602 				buf.append("}?");
1603 			}
1604 			else if ( t.getType()==ANTLRParser.ARG_ACTION ) {
1605 				buf.append("[");
1606 				buf.append(t.getText());
1607 				buf.append("]");
1608 			}
1609 			else {
1610 				buf.append(t.getText());
1611 			}
1612 		}
1613 		String ruleText = buf.toString();
1614 		//System.out.println("[["+ruleText+"]]");
1615 		// now put the rule into the lexer grammar template
1616 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1617 			lexerGrammarST.add("rules", ruleText);
1618 		}
1619 		// track this lexer rule's name
1620 		composite.lexerRules.add(ruleToken.getText());
1621 	}
1622 
1623 	/** If someone does PLUS='+' in the parser, must make sure we get
1624 	 *  "PLUS : '+' ;" in lexer not "T73 : '+';"
1625 	 */
defineLexerRuleForAliasedStringLiteral(String tokenID, String literal, int tokenType)1626 	public void defineLexerRuleForAliasedStringLiteral(String tokenID,
1627 													   String literal,
1628 													   int tokenType)
1629 	{
1630 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1631 			//System.out.println("defineLexerRuleForAliasedStringLiteral: "+literal+" "+tokenType);
1632 			lexerGrammarST.addAggr("literals.{ruleName,type,literal}",
1633 										tokenID,
1634 										Utils.integer(tokenType),
1635 										literal);
1636 		}
1637 		// track this lexer rule's name
1638 		composite.lexerRules.add(tokenID);
1639 	}
1640 
defineLexerRuleForStringLiteral(String literal, int tokenType)1641 	public void defineLexerRuleForStringLiteral(String literal, int tokenType) {
1642 		//System.out.println("defineLexerRuleForStringLiteral: "+literal+" "+tokenType);
1643 		// compute new token name like T237 and define it as having tokenType
1644 		String tokenID = computeTokenNameFromLiteral(tokenType,literal);
1645 		defineToken(tokenID, tokenType);
1646 		// tell implicit lexer to define a rule to match the literal
1647 		if ( getGrammarIsRoot() ) { // don't build lexers for delegates
1648 			lexerGrammarST.addAggr("literals.{ruleName,type,literal}",
1649 										tokenID,
1650 										Utils.integer(tokenType),
1651 										literal);
1652 		}
1653 	}
1654 
getLocallyDefinedRule(String ruleName)1655 	public Rule getLocallyDefinedRule(String ruleName) {
1656 		Rule r = nameToRuleMap.get(ruleName);
1657 		return r;
1658 	}
1659 
getRule(String ruleName)1660 	public Rule getRule(String ruleName) {
1661 		Rule r = composite.getRule(ruleName);
1662 		/*
1663 		if ( r!=null && r.grammar != this ) {
1664 			System.out.println(name+".getRule("+ruleName+")="+r);
1665 		}
1666 		*/
1667 		return r;
1668 	}
1669 
getRule(String scopeName, String ruleName)1670 	public Rule getRule(String scopeName, String ruleName) {
1671 		if ( scopeName!=null ) { // scope override
1672 			Grammar scope = composite.getGrammar(scopeName);
1673 			if ( scope==null ) {
1674 				return null;
1675 			}
1676 			return scope.getLocallyDefinedRule(ruleName);
1677 		}
1678 		return getRule(ruleName);
1679 	}
1680 
getRuleIndex(String scopeName, String ruleName)1681 	public int getRuleIndex(String scopeName, String ruleName) {
1682 		Rule r = getRule(scopeName, ruleName);
1683 		if ( r!=null ) {
1684 			return r.index;
1685 		}
1686 		return INVALID_RULE_INDEX;
1687 	}
1688 
getRuleIndex(String ruleName)1689 	public int getRuleIndex(String ruleName) {
1690 		return getRuleIndex(null, ruleName);
1691 	}
1692 
getRuleName(int ruleIndex)1693 	public String getRuleName(int ruleIndex) {
1694 		Rule r = composite.ruleIndexToRuleList.get(ruleIndex);
1695 		if ( r!=null ) {
1696 			return r.name;
1697 		}
1698 		return null;
1699 	}
1700 
1701 	/** Should codegen.g gen rule for ruleName?
1702 	 * 	If synpred, only gen if used in a DFA.
1703 	 *  If regular rule, only gen if not overridden in delegator
1704 	 *  Always gen Tokens rule though.
1705 	 */
generateMethodForRule(String ruleName)1706 	public boolean generateMethodForRule(String ruleName) {
1707 		if ( ruleName.equals(ARTIFICIAL_TOKENS_RULENAME) ) {
1708 			// always generate Tokens rule to satisfy lexer interface
1709 			// but it may have no alternatives.
1710 			return true;
1711 		}
1712 		if ( overriddenRules.contains(ruleName) ) {
1713 			// don't generate any overridden rules
1714 			return false;
1715 		}
1716 		// generate if non-synpred or synpred used in a DFA
1717 		Rule r = getLocallyDefinedRule(ruleName);
1718 		return !r.isSynPred ||
1719 			   (r.isSynPred&&synPredNamesUsedInDFA.contains(ruleName));
1720 	}
1721 
defineGlobalScope(String name, Token scopeAction)1722 	public AttributeScope defineGlobalScope(String name, Token scopeAction) {
1723 		AttributeScope scope = new AttributeScope(this, name, scopeAction);
1724 		scopes.put(name,scope);
1725 		return scope;
1726 	}
1727 
createReturnScope(String ruleName, Token retAction)1728 	public AttributeScope createReturnScope(String ruleName, Token retAction) {
1729 		AttributeScope scope = new AttributeScope(this, ruleName, retAction);
1730 		scope.isReturnScope = true;
1731 		return scope;
1732 	}
1733 
createRuleScope(String ruleName, Token scopeAction)1734 	public AttributeScope createRuleScope(String ruleName, Token scopeAction) {
1735 		AttributeScope scope = new AttributeScope(this, ruleName, scopeAction);
1736 		scope.isDynamicRuleScope = true;
1737 		return scope;
1738 	}
1739 
createParameterScope(String ruleName, Token argAction)1740 	public AttributeScope createParameterScope(String ruleName, Token argAction) {
1741 		AttributeScope scope = new AttributeScope(this, ruleName, argAction);
1742 		scope.isParameterScope = true;
1743 		return scope;
1744 	}
1745 
1746 	/** Get a global scope */
getGlobalScope(String name)1747 	public AttributeScope getGlobalScope(String name) {
1748 		return (AttributeScope)scopes.get(name);
1749 	}
1750 
getGlobalScopes()1751 	public Map getGlobalScopes() {
1752 		return scopes;
1753 	}
1754 
1755 	/** Define a label defined in a rule r; check the validity then ask the
1756 	 *  Rule object to actually define it.
1757 	 */
defineLabel(Rule r, Token label, GrammarAST element, int type)1758 	protected void defineLabel(Rule r, Token label, GrammarAST element, int type) {
1759 		boolean err = nameSpaceChecker.checkForLabelTypeMismatch(r, label, type);
1760 		if ( err ) {
1761 			return;
1762 		}
1763 		r.defineLabel(label, element, type);
1764 	}
1765 
defineTokenRefLabel(String ruleName, Token label, GrammarAST tokenRef)1766 	public void defineTokenRefLabel(String ruleName,
1767 									Token label,
1768 									GrammarAST tokenRef)
1769 	{
1770 		Rule r = getLocallyDefinedRule(ruleName);
1771 		if ( r!=null ) {
1772 			if ( type==LEXER &&
1773 				 (tokenRef.getType()==ANTLRParser.CHAR_LITERAL||
1774 				  tokenRef.getType()==ANTLRParser.BLOCK||
1775 				  tokenRef.getType()==ANTLRParser.NOT||
1776 				  tokenRef.getType()==ANTLRParser.CHAR_RANGE||
1777 				  tokenRef.getType()==ANTLRParser.WILDCARD))
1778 			{
1779 				defineLabel(r, label, tokenRef, CHAR_LABEL);
1780 			}
1781             else {
1782 				defineLabel(r, label, tokenRef, TOKEN_LABEL);
1783 			}
1784 		}
1785 	}
1786 
defineWildcardTreeLabel(String ruleName, Token label, GrammarAST tokenRef)1787     public void defineWildcardTreeLabel(String ruleName,
1788                                            Token label,
1789                                            GrammarAST tokenRef)
1790     {
1791         Rule r = getLocallyDefinedRule(ruleName);
1792         if ( r!=null ) {
1793             defineLabel(r, label, tokenRef, WILDCARD_TREE_LABEL);
1794         }
1795     }
1796 
defineWildcardTreeListLabel(String ruleName, Token label, GrammarAST tokenRef)1797     public void defineWildcardTreeListLabel(String ruleName,
1798                                            Token label,
1799                                            GrammarAST tokenRef)
1800     {
1801         Rule r = getLocallyDefinedRule(ruleName);
1802         if ( r!=null ) {
1803             defineLabel(r, label, tokenRef, WILDCARD_TREE_LIST_LABEL);
1804         }
1805     }
1806 
defineRuleRefLabel(String ruleName, Token label, GrammarAST ruleRef)1807     public void defineRuleRefLabel(String ruleName,
1808 								   Token label,
1809 								   GrammarAST ruleRef)
1810 	{
1811 		Rule r = getLocallyDefinedRule(ruleName);
1812 		if ( r!=null ) {
1813 			defineLabel(r, label, ruleRef, RULE_LABEL);
1814 		}
1815 	}
1816 
defineTokenListLabel(String ruleName, Token label, GrammarAST element)1817 	public void defineTokenListLabel(String ruleName,
1818 									 Token label,
1819 									 GrammarAST element)
1820 	{
1821 		Rule r = getLocallyDefinedRule(ruleName);
1822 		if ( r!=null ) {
1823 			defineLabel(r, label, element, TOKEN_LIST_LABEL);
1824 		}
1825 	}
1826 
defineRuleListLabel(String ruleName, Token label, GrammarAST element)1827 	public void defineRuleListLabel(String ruleName,
1828 									Token label,
1829 									GrammarAST element)
1830 	{
1831 		Rule r = getLocallyDefinedRule(ruleName);
1832 		if ( r!=null ) {
1833 			if ( !r.getHasMultipleReturnValues() ) {
1834 				ErrorManager.grammarError(
1835 					ErrorManager.MSG_LIST_LABEL_INVALID_UNLESS_RETVAL_STRUCT,this,
1836 					label,label.getText());
1837 			}
1838 			defineLabel(r, label, element, RULE_LIST_LABEL);
1839 		}
1840 	}
1841 
1842 	/** Given a set of all rewrite elements on right of ->, filter for
1843 	 *  label types such as Grammar.TOKEN_LABEL, Grammar.TOKEN_LIST_LABEL, ...
1844 	 *  Return a displayable token type name computed from the GrammarAST.
1845 	 */
getLabels(Set<GrammarAST> rewriteElements, int labelType)1846 	public Set<String> getLabels(Set<GrammarAST> rewriteElements, int labelType) {
1847 		Set<String> labels = new HashSet<String>();
1848 		for (GrammarAST el : rewriteElements) {
1849 			if ( el.getType()==ANTLRParser.LABEL ) {
1850 				String labelName = el.getText();
1851 				Rule enclosingRule = getLocallyDefinedRule(el.enclosingRuleName);
1852 				if ( enclosingRule==null ) continue;
1853 				LabelElementPair pair = enclosingRule.getLabel(labelName);
1854                 /*
1855                 // if tree grammar and we have a wildcard, only notice it
1856                 // when looking for rule labels not token label. x=. should
1857                 // look like a rule ref since could be subtree.
1858                 if ( type==TREE_PARSER && pair!=null &&
1859                      pair.elementRef.getType()==ANTLRParser.WILDCARD )
1860                 {
1861                     if ( labelType==WILDCARD_TREE_LABEL ) {
1862                         labels.add(labelName);
1863                         continue;
1864                     }
1865                     else continue;
1866                 }
1867                  */
1868                 // if valid label and type is what we're looking for
1869 				// and not ref to old value val $rule, add to list
1870 				if ( pair!=null && pair.type==labelType &&
1871 					 !labelName.equals(el.enclosingRuleName) )
1872 				{
1873 					labels.add(labelName);
1874 				}
1875 			}
1876 		}
1877 		return labels;
1878 	}
1879 
1880 	/** Before generating code, we examine all actions that can have
1881 	 *  $x.y and $y stuff in them because some code generation depends on
1882 	 *  Rule.referencedPredefinedRuleAttributes.  I need to remove unused
1883 	 *  rule labels for example.
1884 	 */
examineAllExecutableActions()1885 	protected void examineAllExecutableActions() {
1886 		Collection rules = getRules();
1887 		for (Iterator it = rules.iterator(); it.hasNext();) {
1888 			Rule r = (Rule) it.next();
1889 			// walk all actions within the rule elements, args, and exceptions
1890 			List<GrammarAST> actions = r.getInlineActions();
1891 			for (int i = 0; i < actions.size(); i++) {
1892 				GrammarAST actionAST = (GrammarAST) actions.get(i);
1893 				ActionAnalysis sniffer =
1894 					new ActionAnalysis(this, r.name, actionAST);
1895 				sniffer.analyze();
1896 			}
1897 			// walk any named actions like @init, @after
1898 			Collection<GrammarAST> namedActions = r.getActions().values();
1899 			for (Iterator it2 = namedActions.iterator(); it2.hasNext();) {
1900 				GrammarAST actionAST = (GrammarAST) it2.next();
1901 				ActionAnalysis sniffer =
1902 					new ActionAnalysis(this, r.name, actionAST);
1903 				sniffer.analyze();
1904 			}
1905 		}
1906 	}
1907 
1908 	/** Remove all labels on rule refs whose target rules have no return value.
1909 	 *  Do this for all rules in grammar.
1910 	 */
checkAllRulesForUselessLabels()1911 	public void checkAllRulesForUselessLabels() {
1912 		if ( type==LEXER ) {
1913 			return;
1914 		}
1915 		Set rules = nameToRuleMap.keySet();
1916 		for (Iterator it = rules.iterator(); it.hasNext();) {
1917 			String ruleName = (String) it.next();
1918 			Rule r = getRule(ruleName);
1919 			removeUselessLabels(r.getRuleLabels());
1920 			removeUselessLabels(r.getRuleListLabels());
1921 		}
1922 	}
1923 
1924 	/** A label on a rule is useless if the rule has no return value, no
1925 	 *  tree or template output, and it is not referenced in an action.
1926 	 */
removeUselessLabels(Map ruleToElementLabelPairMap)1927 	protected void removeUselessLabels(Map ruleToElementLabelPairMap) {
1928 		if ( ruleToElementLabelPairMap==null ) {
1929 			return;
1930 		}
1931 		Collection labels = ruleToElementLabelPairMap.values();
1932 		List kill = new ArrayList();
1933 		for (Iterator labelit = labels.iterator(); labelit.hasNext();) {
1934 			LabelElementPair pair = (LabelElementPair) labelit.next();
1935 			Rule refdRule = getRule(pair.elementRef.getText());
1936 			if ( refdRule!=null && !refdRule.getHasReturnValue() && !pair.actionReferencesLabel ) {
1937 				//System.out.println(pair.label.getText()+" is useless");
1938 				kill.add(pair.label.getText());
1939 			}
1940 		}
1941 		for (int i = 0; i < kill.size(); i++) {
1942 			String labelToKill = (String) kill.get(i);
1943 			// System.out.println("kill "+labelToKill);
1944 			ruleToElementLabelPairMap.remove(labelToKill);
1945 		}
1946 	}
1947 
1948 	/** Track a rule reference within an outermost alt of a rule.  Used
1949 	 *  at the moment to decide if $ruleref refers to a unique rule ref in
1950 	 *  the alt.  Rewrite rules force tracking of all rule AST results.
1951 	 *
1952 	 *  This data is also used to verify that all rules have been defined.
1953 	 */
altReferencesRule(String enclosingRuleName, GrammarAST refScopeAST, GrammarAST refAST, int outerAltNum)1954 	public void altReferencesRule(String enclosingRuleName,
1955 								  GrammarAST refScopeAST,
1956 								  GrammarAST refAST,
1957 								  int outerAltNum)
1958 	{
1959 		/* Do nothing for now; not sure need; track S.x as x
1960 		String scope = null;
1961 		Grammar scopeG = null;
1962 		if ( refScopeAST!=null ) {
1963 			if ( !scopedRuleRefs.contains(refScopeAST) ) {
1964 				scopedRuleRefs.add(refScopeAST);
1965 			}
1966 			scope = refScopeAST.getText();
1967 		}
1968 		*/
1969 		Rule r = getRule(enclosingRuleName);
1970 		if ( r==null ) {
1971 			return; // no error here; see NameSpaceChecker
1972 		}
1973 		r.trackRuleReferenceInAlt(refAST, outerAltNum);
1974 		Token refToken = refAST.getToken();
1975 		if ( !ruleRefs.contains(refAST) ) {
1976 			ruleRefs.add(refAST);
1977 		}
1978 	}
1979 
1980 	/** Track a token reference within an outermost alt of a rule.  Used
1981 	 *  to decide if $tokenref refers to a unique token ref in
1982 	 *  the alt. Does not track literals!
1983 	 *
1984 	 *  Rewrite rules force tracking of all tokens.
1985 	 */
altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum)1986 	public void altReferencesTokenID(String ruleName, GrammarAST refAST, int outerAltNum) {
1987 		Rule r = getLocallyDefinedRule(ruleName);
1988 		if ( r==null ) {
1989 			return;
1990 		}
1991 		r.trackTokenReferenceInAlt(refAST, outerAltNum);
1992 		if ( !tokenIDRefs.contains(refAST.getToken()) ) {
1993 			tokenIDRefs.add(refAST.getToken());
1994 		}
1995 	}
1996 
1997 	/** To yield smaller, more readable code, track which rules have their
1998 	 *  predefined attributes accessed.  If the rule has no user-defined
1999 	 *  return values, then don't generate the return value scope classes
2000 	 *  etc...  Make the rule have void return value.  Don't track for lexer
2001 	 *  rules.
2002 	 */
referenceRuleLabelPredefinedAttribute(String ruleName)2003 	public void referenceRuleLabelPredefinedAttribute(String ruleName) {
2004 		Rule r = getRule(ruleName);
2005 		if ( r!=null && type!=LEXER ) {
2006 			// indicate that an action ref'd an attr unless it's in a lexer
2007 			// so that $ID.text refs don't force lexer rules to define
2008 			// return values...Token objects are created by the caller instead.
2009 			r.referencedPredefinedRuleAttributes = true;
2010 		}
2011 	}
2012 
checkAllRulesForLeftRecursion()2013 	public List checkAllRulesForLeftRecursion() {
2014 		return sanity.checkAllRulesForLeftRecursion();
2015 	}
2016 
2017 	/** Return a list of left-recursive rules; no analysis can be done
2018 	 *  successfully on these.  Useful to skip these rules then and also
2019 	 *  for ANTLRWorks to highlight them.
2020 	 */
getLeftRecursiveRules()2021 	public Set<Rule> getLeftRecursiveRules() {
2022 		if ( nfa==null ) {
2023 			buildNFA();
2024 		}
2025 		if ( leftRecursiveRules!=null ) {
2026 			return leftRecursiveRules;
2027 		}
2028 		sanity.checkAllRulesForLeftRecursion();
2029 		return leftRecursiveRules;
2030 	}
2031 
checkRuleReference(GrammarAST scopeAST, GrammarAST refAST, GrammarAST argsAST, String currentRuleName)2032 	public void checkRuleReference(GrammarAST scopeAST,
2033 								   GrammarAST refAST,
2034 								   GrammarAST argsAST,
2035 								   String currentRuleName)
2036 	{
2037 		sanity.checkRuleReference(scopeAST, refAST, argsAST, currentRuleName);
2038 	}
2039 
2040 	/** Rules like "a : ;" and "a : {...} ;" should not generate
2041 	 *  try/catch blocks for RecognitionException.  To detect this
2042 	 *  it's probably ok to just look for any reference to an atom
2043 	 *  that can match some input.  W/o that, the rule is unlikey to have
2044 	 *  any else.
2045 	 */
isEmptyRule(GrammarAST block)2046 	public boolean isEmptyRule(GrammarAST block) {
2047 		GrammarAST aTokenRefNode =
2048 			block.findFirstType(ANTLRParser.TOKEN_REF);
2049 		GrammarAST aStringLiteralRefNode =
2050 			block.findFirstType(ANTLRParser.STRING_LITERAL);
2051 		GrammarAST aCharLiteralRefNode =
2052 			block.findFirstType(ANTLRParser.CHAR_LITERAL);
2053 		GrammarAST aWildcardRefNode =
2054 			block.findFirstType(ANTLRParser.WILDCARD);
2055 		GrammarAST aRuleRefNode =
2056 			block.findFirstType(ANTLRParser.RULE_REF);
2057 		if ( aTokenRefNode==null&&
2058 			 aStringLiteralRefNode==null&&
2059 			 aCharLiteralRefNode==null&&
2060 			 aWildcardRefNode==null&&
2061 			 aRuleRefNode==null )
2062 		{
2063 			return true;
2064 		}
2065 		return false;
2066 	}
2067 
isAtomTokenType(int ttype)2068 	public boolean isAtomTokenType(int ttype) {
2069 		return ttype == ANTLRParser.WILDCARD||
2070 			   ttype == ANTLRParser.CHAR_LITERAL||
2071 			   ttype == ANTLRParser.CHAR_RANGE||
2072 			   ttype == ANTLRParser.STRING_LITERAL||
2073 			   ttype == ANTLRParser.NOT||
2074 			   (type != LEXER && ttype == ANTLRParser.TOKEN_REF);
2075 	}
2076 
getTokenType(String tokenName)2077 	public int getTokenType(String tokenName) {
2078 		Integer I = null;
2079 		if ( tokenName.charAt(0)=='\'') {
2080 			I = (Integer)composite.stringLiteralToTypeMap.get(tokenName);
2081 		}
2082 		else { // must be a label like ID
2083 			I = (Integer)composite.tokenIDToTypeMap.get(tokenName);
2084 		}
2085 		int i = (I!=null)?I.intValue():Label.INVALID;
2086 		//System.out.println("grammar type "+type+" "+tokenName+"->"+i);
2087 		return i;
2088 	}
2089 
2090 	/** Get the list of tokens that are IDs like BLOCK and LPAREN */
getTokenIDs()2091 	public Set getTokenIDs() {
2092 		return composite.tokenIDToTypeMap.keySet();
2093 	}
2094 
2095 	/** Return an ordered integer list of token types that have no
2096 	 *  corresponding token ID like INT or KEYWORD_BEGIN; for stuff
2097 	 *  like 'begin'.
2098 	 */
getTokenTypesWithoutID()2099 	public Collection getTokenTypesWithoutID() {
2100 		List types = new ArrayList();
2101 		for (int t =Label.MIN_TOKEN_TYPE; t<=getMaxTokenType(); t++) {
2102 			String name = getTokenDisplayName(t);
2103 			if ( name.charAt(0)=='\'' ) {
2104 				types.add(Utils.integer(t));
2105 			}
2106 		}
2107 		return types;
2108 	}
2109 
2110 	/** Get a list of all token IDs and literals that have an associated
2111 	 *  token type.
2112 	 */
getTokenDisplayNames()2113 	public Set<String> getTokenDisplayNames() {
2114 		Set<String> names = new HashSet<String>();
2115 		for (int t =Label.MIN_TOKEN_TYPE; t <=getMaxTokenType(); t++) {
2116 			names.add(getTokenDisplayName(t));
2117 		}
2118 		return names;
2119 	}
2120 
2121 	/** Given a literal like (the 3 char sequence with single quotes) 'a',
2122 	 *  return the int value of 'a'. Convert escape sequences here also.
2123 	 *  ANTLR's antlr.g parser does not convert escape sequences.
2124 	 *
2125 	 *  11/26/2005: I changed literals to always be '...' even for strings.
2126 	 *  This routine still works though.
2127 	 */
getCharValueFromGrammarCharLiteral(String literal)2128 	public static int getCharValueFromGrammarCharLiteral(String literal) {
2129 		switch ( literal.length() ) {
2130 			case 3 :
2131 				// 'x'
2132 				return literal.charAt(1); // no escape char
2133 			case 4 :
2134 				// '\x'  (antlr lexer will catch invalid char)
2135 				if ( Character.isDigit(literal.charAt(2)) ) {
2136 					ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2137 									   "invalid char literal: "+literal);
2138 					return -1;
2139 				}
2140 				int escChar = literal.charAt(2);
2141 				int charVal = ANTLRLiteralEscapedCharValue[escChar];
2142 				if ( charVal==0 ) {
2143 					// Unnecessary escapes like '\{' should just yield {
2144 					return escChar;
2145 				}
2146 				return charVal;
2147 			case 8 :
2148 				// '\u1234'
2149 				String unicodeChars = literal.substring(3,literal.length()-1);
2150 				return Integer.parseInt(unicodeChars, 16);
2151 			default :
2152 				ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2153 								   "invalid char literal: "+literal);
2154 				return -1;
2155 		}
2156 	}
2157 
2158 	/** ANTLR does not convert escape sequences during the parse phase because
2159 	 *  it could not know how to print String/char literals back out when
2160 	 *  printing grammars etc...  Someone in China might use the real unicode
2161 	 *  char in a literal as it will display on their screen; when printing
2162 	 *  back out, I could not know whether to display or use a unicode escape.
2163 	 *
2164 	 *  This routine converts a string literal with possible escape sequences
2165 	 *  into a pure string of 16-bit char values.  Escapes and unicode \u0000
2166 	 *  specs are converted to pure chars.  return in a buffer; people may
2167 	 *  want to walk/manipulate further.
2168 	 *
2169 	 *  The NFA construction routine must know the actual char values.
2170 	 */
getUnescapedStringFromGrammarStringLiteral(String literal)2171 	public static StringBuffer getUnescapedStringFromGrammarStringLiteral(String literal) {
2172 		//System.out.println("escape: ["+literal+"]");
2173 		StringBuffer buf = new StringBuffer();
2174 		int last = literal.length()-1; // skip quotes on outside
2175 		for (int i=1; i<last; i++) {
2176 			char c = literal.charAt(i);
2177 			if ( c=='\\' ) {
2178 				i++;
2179 				c = literal.charAt(i);
2180 				if ( Character.toUpperCase(c)=='U' ) {
2181 					// \u0000
2182 					i++;
2183 					String unicodeChars = literal.substring(i,i+4);
2184 					// parse the unicode 16 bit hex value
2185 					int val = Integer.parseInt(unicodeChars, 16);
2186 					i+=4-1; // loop will inc by 1; only jump 3 then
2187 					buf.append((char)val);
2188 				}
2189 				else if ( Character.isDigit(c) ) {
2190 					ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,
2191 									   "invalid char literal: "+literal);
2192 					buf.append("\\"+(char)c);
2193 				}
2194 				else {
2195 					buf.append((char)ANTLRLiteralEscapedCharValue[c]); // normal \x escape
2196 				}
2197 			}
2198 			else {
2199 				buf.append(c); // simple char x
2200 			}
2201 		}
2202 		//System.out.println("string: ["+buf.toString()+"]");
2203 		return buf;
2204 	}
2205 
2206 	/** Pull your token definitions from an existing grammar in memory.
2207 	 *  You must use Grammar() ctor then this method then setGrammarContent()
2208 	 *  to make this work.  This was useful primarily for testing and
2209 	 *  interpreting grammars until I added import grammar functionality.
2210 	 *  When you import a grammar you implicitly import its vocabulary as well
2211 	 *  and keep the same token type values.
2212 	 *
2213 	 *  Returns the max token type found.
2214 	 */
importTokenVocabulary(Grammar importFromGr)2215 	public int importTokenVocabulary(Grammar importFromGr) {
2216 		Set importedTokenIDs = importFromGr.getTokenIDs();
2217 		for (Iterator it = importedTokenIDs.iterator(); it.hasNext();) {
2218 			String tokenID = (String) it.next();
2219 			int tokenType = importFromGr.getTokenType(tokenID);
2220 			composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
2221 			if ( tokenType>=Label.MIN_TOKEN_TYPE ) {
2222 				//System.out.println("import token from grammar "+tokenID+"="+tokenType);
2223 				defineToken(tokenID, tokenType);
2224 			}
2225 		}
2226 		return composite.maxTokenType; // return max found
2227 	}
2228 
2229 	/** Import the rules/tokens of a delegate grammar. All delegate grammars are
2230 	 *  read during the ctor of first Grammar created.
2231 	 *
2232 	 *  Do not create NFA here because NFA construction needs to hook up with
2233 	 *  overridden rules in delegation root grammar.
2234 	 */
importGrammar(GrammarAST grammarNameAST, String label)2235 	public void importGrammar(GrammarAST grammarNameAST, String label) {
2236 		String grammarName = grammarNameAST.getText();
2237 		//System.out.println("import "+gfile.getName());
2238 		String gname = grammarName + GRAMMAR_FILE_EXTENSION;
2239 		BufferedReader br = null;
2240 		try {
2241 			String fullName = tool.getLibraryFile(gname);
2242 			FileReader fr = new FileReader(fullName);
2243 			br = new BufferedReader(fr);
2244 			Grammar delegateGrammar = null;
2245 			delegateGrammar = new Grammar(tool, gname, composite);
2246 			delegateGrammar.label = label;
2247 
2248 			addDelegateGrammar(delegateGrammar);
2249 
2250 			delegateGrammar.parseAndBuildAST(br);
2251 			delegateGrammar.addRulesForSyntacticPredicates();
2252 			if ( !validImport(delegateGrammar) ) {
2253 				ErrorManager.grammarError(ErrorManager.MSG_INVALID_IMPORT,
2254 										  this,
2255 										  grammarNameAST.token,
2256 										  this,
2257 										  delegateGrammar);
2258 				return;
2259 			}
2260 			if ( this.type==COMBINED &&
2261 				 (delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[LEXER])||
2262 				  delegateGrammar.name.equals(this.name+grammarTypeToFileNameSuffix[PARSER])) )
2263 			{
2264 				ErrorManager.grammarError(ErrorManager.MSG_IMPORT_NAME_CLASH,
2265 										  this,
2266 										  grammarNameAST.token,
2267 										  this,
2268 										  delegateGrammar);
2269 				return;
2270 			}
2271 			if ( delegateGrammar.grammarTree!=null ) {
2272 				// we have a valid grammar
2273 				// deal with combined grammars
2274 				if ( delegateGrammar.type == LEXER && this.type == COMBINED ) {
2275 					// ooops, we wasted some effort; tell lexer to read it in
2276 					// later
2277 					lexerGrammarST.add("imports", grammarName);
2278 					// but, this parser grammar will need the vocab
2279 					// so add to composite anyway so we suck in the tokens later
2280 				}
2281 			}
2282 			//System.out.println("Got grammar:\n"+delegateGrammar);
2283 		}
2284 		catch (IOException ioe) {
2285 			ErrorManager.error(ErrorManager.MSG_CANNOT_OPEN_FILE,
2286 							   gname,
2287 							   ioe);
2288 		}
2289 		finally {
2290 			if ( br!=null ) {
2291 				try {
2292 					br.close();
2293 				}
2294 				catch (IOException ioe) {
2295 					ErrorManager.error(ErrorManager.MSG_CANNOT_CLOSE_FILE,
2296 									   gname,
2297 									   ioe);
2298 				}
2299 			}
2300 		}
2301 	}
2302 
2303 	/** add new delegate to composite tree */
addDelegateGrammar(Grammar delegateGrammar)2304 	protected void addDelegateGrammar(Grammar delegateGrammar) {
2305 		CompositeGrammarTree t = composite.delegateGrammarTreeRoot.findNode(this);
2306 		t.addChild(new CompositeGrammarTree(delegateGrammar));
2307 		// make sure new grammar shares this composite
2308 		delegateGrammar.composite = this.composite;
2309 	}
2310 
2311 	/** Load a vocab file <vocabName>.tokens and return max token type found. */
importTokenVocabulary(GrammarAST tokenVocabOptionAST, String vocabName)2312 	public int importTokenVocabulary(GrammarAST tokenVocabOptionAST,
2313 									 String vocabName)
2314 	{
2315 		if ( !getGrammarIsRoot() ) {
2316 			ErrorManager.grammarWarning(ErrorManager.MSG_TOKEN_VOCAB_IN_DELEGATE,
2317 										this,
2318 										tokenVocabOptionAST.token,
2319 										name);
2320 			return composite.maxTokenType;
2321 		}
2322 
2323 		File fullFile = tool.getImportedVocabFile(vocabName);
2324 		try {
2325 			FileReader fr = new FileReader(fullFile);
2326 			BufferedReader br = new BufferedReader(fr);
2327 			StreamTokenizer tokenizer = new StreamTokenizer(br);
2328 			tokenizer.parseNumbers();
2329 			tokenizer.wordChars('_', '_');
2330 			tokenizer.eolIsSignificant(true);
2331 			tokenizer.slashSlashComments(true);
2332 			tokenizer.slashStarComments(true);
2333 			tokenizer.ordinaryChar('=');
2334 			tokenizer.quoteChar('\'');
2335 			tokenizer.whitespaceChars(' ',' ');
2336 			tokenizer.whitespaceChars('\t','\t');
2337 			int lineNum = 1;
2338 			int token = tokenizer.nextToken();
2339 			while (token != StreamTokenizer.TT_EOF) {
2340 				String tokenID;
2341 				if ( token == StreamTokenizer.TT_WORD ) {
2342 					tokenID = tokenizer.sval;
2343 				}
2344 				else if ( token == '\'' ) {
2345 					tokenID = "'"+tokenizer.sval+"'";
2346 				}
2347 				else {
2348 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2349 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2350 									   Utils.integer(lineNum));
2351 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
2352 					token = tokenizer.nextToken();
2353 					continue;
2354 				}
2355 				token = tokenizer.nextToken();
2356 				if ( token != '=' ) {
2357 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2358 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2359 									   Utils.integer(lineNum));
2360 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
2361 					token = tokenizer.nextToken();
2362 					continue;
2363 				}
2364 				token = tokenizer.nextToken(); // skip '='
2365 				if ( token != StreamTokenizer.TT_NUMBER ) {
2366 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2367 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2368 									   Utils.integer(lineNum));
2369 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
2370 					token = tokenizer.nextToken();
2371 					continue;
2372 				}
2373 				int tokenType = (int)tokenizer.nval;
2374 				token = tokenizer.nextToken();
2375 				//System.out.println("import "+tokenID+"="+tokenType);
2376 				composite.maxTokenType = Math.max(composite.maxTokenType,tokenType);
2377 				defineToken(tokenID, tokenType);
2378 				lineNum++;
2379 				if ( token != StreamTokenizer.TT_EOL ) {
2380 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
2381 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
2382 									   Utils.integer(lineNum));
2383 					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {;}
2384 					token = tokenizer.nextToken();
2385 					continue;
2386 				}
2387 				token = tokenizer.nextToken(); // skip newline
2388 			}
2389 			br.close();
2390 		}
2391 		catch (FileNotFoundException fnfe) {
2392 			ErrorManager.error(ErrorManager.MSG_CANNOT_FIND_TOKENS_FILE,
2393 							   fullFile);
2394 		}
2395 		catch (IOException ioe) {
2396 			ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
2397 							   fullFile,
2398 							   ioe);
2399 		}
2400 		catch (Exception e) {
2401 			ErrorManager.error(ErrorManager.MSG_ERROR_READING_TOKENS_FILE,
2402 							   fullFile,
2403 							   e);
2404 		}
2405 		return composite.maxTokenType;
2406 	}
2407 
2408 	/** Given a token type, get a meaningful name for it such as the ID
2409 	 *  or string literal.  If this is a lexer and the ttype is in the
2410 	 *  char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.
2411 	 */
getTokenDisplayName(int ttype)2412 	public String getTokenDisplayName(int ttype) {
2413 		String tokenName = null;
2414 		int index=0;
2415 		// inside any target's char range and is lexer grammar?
2416 		if ( this.type==LEXER &&
2417 			 ttype >= Label.MIN_CHAR_VALUE && ttype <= Label.MAX_CHAR_VALUE )
2418 		{
2419 			return getANTLRCharLiteralForChar(ttype);
2420 		}
2421 		// faux label?
2422 		else if ( ttype<0 ) {
2423 			tokenName = (String)composite.typeToTokenList.get(Label.NUM_FAUX_LABELS+ttype);
2424 		}
2425 		else {
2426 			// compute index in typeToTokenList for ttype
2427 			index = ttype-1; // normalize to 0..n-1
2428 			index += Label.NUM_FAUX_LABELS;     // jump over faux tokens
2429 
2430 			if ( index<composite.typeToTokenList.size() ) {
2431 				tokenName = (String)composite.typeToTokenList.get(index);
2432 				if ( tokenName!=null &&
2433 					 tokenName.startsWith(AUTO_GENERATED_TOKEN_NAME_PREFIX) )
2434 				{
2435 					tokenName = composite.typeToStringLiteralList.get(ttype);
2436 				}
2437 			}
2438 			else {
2439 				tokenName = String.valueOf(ttype);
2440 			}
2441 		}
2442 		//System.out.println("getTokenDisplayName ttype="+ttype+", index="+index+", name="+tokenName);
2443 		return tokenName;
2444 	}
2445 
2446 	/** Get the list of ANTLR String literals */
getStringLiterals()2447 	public Set<String> getStringLiterals() {
2448 		return composite.stringLiteralToTypeMap.keySet();
2449 	}
2450 
getGrammarTypeString()2451 	public String getGrammarTypeString() {
2452 		return grammarTypeToString[type];
2453 	}
2454 
getGrammarMaxLookahead()2455 	public int getGrammarMaxLookahead() {
2456 		if ( global_k>=0 ) {
2457 			return global_k;
2458 		}
2459 		Object k = getOption("k");
2460 		if ( k==null ) {
2461 			global_k = 0;
2462 		}
2463 		else if (k instanceof Integer) {
2464 			Integer kI = (Integer)k;
2465 			global_k = kI.intValue();
2466 		}
2467 		else {
2468 			// must be String "*"
2469 			if ( k.equals("*") ) {  // this the default anyway
2470 				global_k = 0;
2471 			}
2472 		}
2473 		return global_k;
2474 	}
2475 
2476 	/** Save the option key/value pair and process it; return the key
2477 	 *  or null if invalid option.
2478 	 */
setOption(String key, Object value, Token optionsStartToken)2479 	public String setOption(String key, Object value, Token optionsStartToken) {
2480 		if ( legalOption(key) ) {
2481 			ErrorManager.grammarError(ErrorManager.MSG_ILLEGAL_OPTION,
2482 									  this,
2483 									  optionsStartToken,
2484 									  key);
2485 			return null;
2486 		}
2487 		if ( !optionIsValid(key, value) ) {
2488 			return null;
2489 		}
2490         if ( key.equals("backtrack") && value.toString().equals("true") ) {
2491             composite.getRootGrammar().atLeastOneBacktrackOption = true;
2492         }
2493         if ( options==null ) {
2494 			options = new HashMap();
2495 		}
2496 		options.put(key, value);
2497 		return key;
2498 	}
2499 
legalOption(String key)2500 	public boolean legalOption(String key) {
2501 		switch ( type ) {
2502 			case LEXER :
2503 				return !legalLexerOptions.contains(key);
2504 			case PARSER :
2505 				return !legalParserOptions.contains(key);
2506 			case TREE_PARSER :
2507 				return !legalTreeParserOptions.contains(key);
2508 			default :
2509 				return !legalParserOptions.contains(key);
2510 		}
2511 	}
2512 
setOptions(Map options, Token optionsStartToken)2513 	public void setOptions(Map options, Token optionsStartToken) {
2514 		if ( options==null ) {
2515 			this.options = null;
2516 			return;
2517 		}
2518 		Set keys = options.keySet();
2519 		for (Iterator it = keys.iterator(); it.hasNext();) {
2520 			String optionName = (String) it.next();
2521 			Object optionValue = options.get(optionName);
2522 			String stored=setOption(optionName, optionValue, optionsStartToken);
2523 			if ( stored==null ) {
2524 				it.remove();
2525 			}
2526 		}
2527 	}
2528 
getOption(String key)2529 	public Object getOption(String key) {
2530 		return composite.getOption(key);
2531 	}
2532 
getLocallyDefinedOption(String key)2533 	public Object getLocallyDefinedOption(String key) {
2534 		Object value = null;
2535 		if ( options!=null ) {
2536 			value = options.get(key);
2537 		}
2538 		if ( value==null ) {
2539 			value = defaultOptions.get(key);
2540 		}
2541 		return value;
2542 	}
2543 
getBlockOption(GrammarAST blockAST, String key)2544 	public Object getBlockOption(GrammarAST blockAST, String key) {
2545 		String v = (String)blockAST.getBlockOption(key);
2546 		if ( v!=null ) {
2547 			return v;
2548 		}
2549 		if ( type==Grammar.LEXER ) {
2550 			return defaultLexerBlockOptions.get(key);
2551 		}
2552 		return defaultBlockOptions.get(key);
2553 	}
2554 
getUserMaxLookahead(int decision)2555 	public int getUserMaxLookahead(int decision) {
2556 		int user_k = 0;
2557 		GrammarAST blockAST = nfa.grammar.getDecisionBlockAST(decision);
2558 		Object k = blockAST.getBlockOption("k");
2559 		if ( k==null ) {
2560 			user_k = nfa.grammar.getGrammarMaxLookahead();
2561 			return user_k;
2562 		}
2563 		if (k instanceof Integer) {
2564 			Integer kI = (Integer)k;
2565 			user_k = kI.intValue();
2566 		}
2567 		else {
2568 			// must be String "*"
2569 			if ( k.equals("*") ) {
2570 				user_k = 0;
2571 			}
2572 		}
2573 		return user_k;
2574 	}
2575 
getAutoBacktrackMode(int decision)2576 	public boolean getAutoBacktrackMode(int decision) {
2577 		NFAState decisionNFAStartState = getDecisionNFAStartState(decision);
2578 		String autoBacktrack =
2579 			(String)getBlockOption(decisionNFAStartState.associatedASTNode, "backtrack");
2580 
2581 		if ( autoBacktrack==null ) {
2582 			autoBacktrack = (String)nfa.grammar.getOption("backtrack");
2583 		}
2584 		return autoBacktrack!=null&&autoBacktrack.equals("true");
2585 	}
2586 
optionIsValid(String key, Object value)2587 	public boolean optionIsValid(String key, Object value) {
2588 		return true;
2589 	}
2590 
buildAST()2591 	public boolean buildAST() {
2592 		String outputType = (String)getOption("output");
2593 		if ( outputType!=null ) {
2594 			return outputType.toString().equals("AST");
2595 		}
2596 		return false;
2597 	}
2598 
rewriteMode()2599 	public boolean rewriteMode() {
2600 		Object outputType = getOption("rewrite");
2601 		if ( outputType!=null ) {
2602 			return outputType.toString().equals("true");
2603 		}
2604 		return false;
2605 	}
2606 
isBuiltFromString()2607 	public boolean isBuiltFromString() {
2608 		return builtFromString;
2609 	}
2610 
buildTemplate()2611 	public boolean buildTemplate() {
2612 		String outputType = (String)getOption("output");
2613 		if ( outputType!=null ) {
2614 			return outputType.toString().equals("template");
2615 		}
2616 		return false;
2617 	}
2618 
getRules()2619 	public Collection<Rule> getRules() {
2620 		return nameToRuleMap.values();
2621 	}
2622 
2623 	/** Get the set of Rules that need to have manual delegations
2624 	 *  like "void rule() { importedGrammar.rule(); }"
2625 	 *
2626 	 *  If this grammar is master, get list of all rule definitions from all
2627 	 *  delegate grammars.  Only master has complete interface from combined
2628 	 *  grammars...we will generated delegates as helper objects.
2629 	 *
2630 	 *  Composite grammars that are not the root/master do not have complete
2631 	 *  interfaces.  It is not my intention that people use subcomposites.
2632 	 *  Only the outermost grammar should be used from outside code.  The
2633 	 *  other grammar components are specifically generated to work only
2634 	 *  with the master/root.
2635 	 *
2636 	 *  delegatedRules = imported - overridden
2637 	 */
getDelegatedRules()2638 	public Set<Rule> getDelegatedRules() {
2639 		return composite.getDelegatedRules(this);
2640 	}
2641 
2642 	/** Get set of all rules imported from all delegate grammars even if
2643 	 *  indirectly delegated.
2644 	 */
getAllImportedRules()2645 	public Set<Rule> getAllImportedRules() {
2646 		return composite.getAllImportedRules(this);
2647 	}
2648 
2649 	/** Get list of all delegates from all grammars directly or indirectly
2650 	 *  imported into this grammar.
2651 	 */
getDelegates()2652 	public List<Grammar> getDelegates() {
2653 		return composite.getDelegates(this);
2654 	}
2655 
getHasDelegates()2656 	public boolean getHasDelegates() {
2657 	   return !getDelegates().isEmpty();
2658 	}
2659 
getDelegateNames()2660 	public List<String> getDelegateNames() {
2661 		// compute delegates:{Grammar g | return g.name;}
2662 		List<String> names = new ArrayList<String>();
2663 		List<Grammar> delegates = composite.getDelegates(this);
2664 		if ( delegates!=null ) {
2665 			for (Grammar g : delegates) {
2666 				names.add(g.name);
2667 			}
2668 		}
2669 		return names;
2670 	}
2671 
getDirectDelegates()2672 	public List<Grammar> getDirectDelegates() {
2673 		return composite.getDirectDelegates(this);
2674 	}
2675 
2676 	/** Get delegates below direct delegates */
getIndirectDelegates()2677 	public List<Grammar> getIndirectDelegates() {
2678 		return composite.getIndirectDelegates(this);
2679 	}
2680 
2681 	/** Get list of all delegators.  This amounts to the grammars on the path
2682 	 *  to the root of the delegation tree.
2683 	 */
getDelegators()2684 	public List<Grammar> getDelegators() {
2685 		return composite.getDelegators(this);
2686 	}
2687 
2688 	/** Who's my direct parent grammar? */
getDelegator()2689 	public Grammar getDelegator() {
2690 		return composite.getDelegator(this);
2691 	}
2692 
getDelegatedRuleReferences()2693 	public Set<Rule> getDelegatedRuleReferences() {
2694 		return delegatedRuleReferences;
2695 	}
2696 
getGrammarIsRoot()2697 	public boolean getGrammarIsRoot() {
2698 		return composite.delegateGrammarTreeRoot.grammar == this;
2699 	}
2700 
setRuleAST(String ruleName, GrammarAST t)2701 	public void setRuleAST(String ruleName, GrammarAST t) {
2702 		Rule r = getLocallyDefinedRule(ruleName);
2703 		if ( r!=null ) {
2704 			r.tree = t;
2705 			r.EORNode = t.getLastChild();
2706 		}
2707 	}
2708 
getRuleStartState(String ruleName)2709 	public NFAState getRuleStartState(String ruleName) {
2710 		return getRuleStartState(null, ruleName);
2711 	}
2712 
getRuleStartState(String scopeName, String ruleName)2713 	public NFAState getRuleStartState(String scopeName, String ruleName) {
2714 		Rule r = getRule(scopeName, ruleName);
2715 		if ( r!=null ) {
2716 			//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")="+r.startState);
2717 			return r.startState;
2718 		}
2719 		//System.out.println("getRuleStartState("+scopeName+", "+ruleName+")=null");
2720 		return null;
2721 	}
2722 
getRuleModifier(String ruleName)2723 	public String getRuleModifier(String ruleName) {
2724 		Rule r = getRule(ruleName);
2725 		if ( r!=null ) {
2726 			return r.modifier;
2727 		}
2728 		return null;
2729 	}
2730 
getRuleStopState(String ruleName)2731 	public NFAState getRuleStopState(String ruleName) {
2732 		Rule r = getRule(ruleName);
2733 		if ( r!=null ) {
2734 			return r.stopState;
2735 		}
2736 		return null;
2737 	}
2738 
assignDecisionNumber(NFAState state)2739 	public int assignDecisionNumber(NFAState state) {
2740 		decisionCount++;
2741 		state.setDecisionNumber(decisionCount);
2742 		return decisionCount;
2743 	}
2744 
getDecision(int decision)2745 	protected Decision getDecision(int decision) {
2746 		int index = decision-1;
2747 		if ( index >= indexToDecision.size() ) {
2748 			return null;
2749 		}
2750 		Decision d = indexToDecision.get(index);
2751 		return d;
2752 	}
2753 
getDecisions()2754 	public List<Decision> getDecisions() {
2755 		return indexToDecision;
2756 	}
2757 
createDecision(int decision)2758 	protected Decision createDecision(int decision) {
2759 		int index = decision-1;
2760 		if ( index < indexToDecision.size() ) {
2761 			return getDecision(decision); // don't recreate
2762 		}
2763 		Decision d = new Decision();
2764 		d.decision = decision;
2765 		d.grammar = this;
2766 		indexToDecision.setSize(getNumberOfDecisions());
2767 		indexToDecision.set(index, d);
2768 		return d;
2769 	}
2770 
getDecisionNFAStartStateList()2771 	public List getDecisionNFAStartStateList() {
2772 		List states = new ArrayList(100);
2773 		for (int d = 0; d < indexToDecision.size(); d++) {
2774 			Decision dec = (Decision) indexToDecision.get(d);
2775 			states.add(dec.startState);
2776 		}
2777 		return states;
2778 	}
2779 
getDecisionNFAStartState(int decision)2780 	public NFAState getDecisionNFAStartState(int decision) {
2781 		Decision d = getDecision(decision);
2782 		if ( d==null ) {
2783 			return null;
2784 		}
2785 		return d.startState;
2786 	}
2787 
getLookaheadDFA(int decision)2788 	public DFA getLookaheadDFA(int decision) {
2789 		Decision d = getDecision(decision);
2790 		if ( d==null ) {
2791 			return null;
2792 		}
2793 		return d.dfa;
2794 	}
2795 
getDecisionBlockAST(int decision)2796 	public GrammarAST getDecisionBlockAST(int decision) {
2797 		Decision d = getDecision(decision);
2798 		if ( d==null ) {
2799 			return null;
2800 		}
2801 		return d.blockAST;
2802 	}
2803 
2804 	/** returns a list of column numbers for all decisions
2805 	 *  on a particular line so ANTLRWorks choose the decision
2806 	 *  depending on the location of the cursor (otherwise,
2807 	 *  ANTLRWorks has to give the *exact* location which
2808 	 *  is not easy from the user point of view).
2809 	 *
2810 	 *  This is not particularly fast as it walks entire line:col->DFA map
2811 	 *  looking for a prefix of "line:".
2812 	 */
getLookaheadDFAColumnsForLineInFile(int line)2813 	public List getLookaheadDFAColumnsForLineInFile(int line) {
2814 		String prefix = line+":";
2815 		List columns = new ArrayList();
2816 		for(Iterator iter = lineColumnToLookaheadDFAMap.keySet().iterator();
2817 			iter.hasNext(); ) {
2818 			String key = (String)iter.next();
2819 			if(key.startsWith(prefix)) {
2820 				columns.add(Integer.valueOf(key.substring(prefix.length())));
2821 			}
2822 		}
2823 		return columns;
2824 	}
2825 
2826 	/** Useful for ANTLRWorks to map position in file to the DFA for display */
getLookaheadDFAFromPositionInFile(int line, int col)2827 	public DFA getLookaheadDFAFromPositionInFile(int line, int col) {
2828 		return (DFA)lineColumnToLookaheadDFAMap.get(
2829 			new StringBuffer().append(line + ":").append(col).toString());
2830 	}
2831 
getLineColumnToLookaheadDFAMap()2832 	public Map getLineColumnToLookaheadDFAMap() {
2833 		return lineColumnToLookaheadDFAMap;
2834 	}
2835 
2836 	/*
2837 	public void setDecisionOptions(int decision, Map options) {
2838 		Decision d = createDecision(decision);
2839 		d.options = options;
2840 	}
2841 
2842 	public void setDecisionOption(int decision, String name, Object value) {
2843 		Decision d = getDecision(decision);
2844 		if ( d!=null ) {
2845 			if ( d.options==null ) {
2846 				d.options = new HashMap();
2847 			}
2848 			d.options.put(name,value);
2849 		}
2850 	}
2851 
2852 	public Map getDecisionOptions(int decision) {
2853 		Decision d = getDecision(decision);
2854 		if ( d==null ) {
2855 			return null;
2856 		}
2857 		return d.options;
2858     }
2859     */
2860 
getNumberOfDecisions()2861 	public int getNumberOfDecisions() {
2862 		return decisionCount;
2863 	}
2864 
getNumberOfCyclicDecisions()2865 	public int getNumberOfCyclicDecisions() {
2866 		int n = 0;
2867 		for (int i=1; i<=getNumberOfDecisions(); i++) {
2868 			Decision d = getDecision(i);
2869 			if ( d.dfa!=null && d.dfa.isCyclic() ) {
2870 				n++;
2871 			}
2872 		}
2873 		return n;
2874 	}
2875 
2876 	/** Set the lookahead DFA for a particular decision.  This means
2877 	 *  that the appropriate AST node must updated to have the new lookahead
2878 	 *  DFA.  This method could be used to properly set the DFAs without
2879 	 *  using the createLookaheadDFAs() method.  You could do this
2880 	 *
2881 	 *    Grammar g = new Grammar("...");
2882 	 *    g.setLookahead(1, dfa1);
2883 	 *    g.setLookahead(2, dfa2);
2884 	 *    ...
2885 	 */
setLookaheadDFA(int decision, DFA lookaheadDFA)2886 	public void setLookaheadDFA(int decision, DFA lookaheadDFA) {
2887 		Decision d = createDecision(decision);
2888 		d.dfa = lookaheadDFA;
2889 		GrammarAST ast = d.startState.associatedASTNode;
2890 		ast.setLookaheadDFA(lookaheadDFA);
2891 	}
2892 
setDecisionNFA(int decision, NFAState state)2893 	public void setDecisionNFA(int decision, NFAState state) {
2894 		Decision d = createDecision(decision);
2895 		d.startState = state;
2896 	}
2897 
setDecisionBlockAST(int decision, GrammarAST blockAST)2898 	public void setDecisionBlockAST(int decision, GrammarAST blockAST) {
2899 		//System.out.println("setDecisionBlockAST("+decision+", "+blockAST.token);
2900 		Decision d = createDecision(decision);
2901 		d.blockAST = blockAST;
2902 	}
2903 
allDecisionDFAHaveBeenCreated()2904 	public boolean allDecisionDFAHaveBeenCreated() {
2905 		return allDecisionDFACreated;
2906 	}
2907 
2908 	/** How many token types have been allocated so far? */
getMaxTokenType()2909 	public int getMaxTokenType() {
2910 		return composite.maxTokenType;
2911 	}
2912 
2913 	/** What is the max char value possible for this grammar's target?  Use
2914 	 *  unicode max if no target defined.
2915 	 */
getMaxCharValue()2916 	public int getMaxCharValue() {
2917 		if ( generator!=null ) {
2918 			return generator.target.getMaxCharValue(generator);
2919 		}
2920 		else {
2921 			return Label.MAX_CHAR_VALUE;
2922 		}
2923 	}
2924 
2925 	/** Return a set of all possible token or char types for this grammar */
getTokenTypes()2926 	public IntSet getTokenTypes() {
2927 		if ( type==LEXER ) {
2928 			return getAllCharValues();
2929 		}
2930 		return IntervalSet.of(Label.MIN_TOKEN_TYPE, getMaxTokenType());
2931 	}
2932 
2933 	/** If there is a char vocabulary, use it; else return min to max char
2934 	 *  as defined by the target.  If no target, use max unicode char value.
2935 	 */
getAllCharValues()2936 	public IntSet getAllCharValues() {
2937 		if ( charVocabulary!=null ) {
2938 			return charVocabulary;
2939 		}
2940 		IntSet allChar = IntervalSet.of(Label.MIN_CHAR_VALUE, getMaxCharValue());
2941 		return allChar;
2942 	}
2943 
2944 	/** Return a string representing the escaped char for code c.  E.g., If c
2945 	 *  has value 0x100, you will get "\u0100".  ASCII gets the usual
2946 	 *  char (non-hex) representation.  Control characters are spit out
2947 	 *  as unicode.  While this is specially set up for returning Java strings,
2948 	 *  it can be used by any language target that has the same syntax. :)
2949 	 *
2950 	 *  11/26/2005: I changed this to use double quotes, consistent with antlr.g
2951 	 *  12/09/2005: I changed so everything is single quotes
2952 	 */
getANTLRCharLiteralForChar(int c)2953 	public static String getANTLRCharLiteralForChar(int c) {
2954 		if ( c<Label.MIN_CHAR_VALUE ) {
2955 			ErrorManager.internalError("invalid char value "+c);
2956 			return "'<INVALID>'";
2957 		}
2958 		if ( c<ANTLRLiteralCharValueEscape.length && ANTLRLiteralCharValueEscape[c]!=null ) {
2959 			return '\''+ANTLRLiteralCharValueEscape[c]+'\'';
2960 		}
2961 		if ( Character.UnicodeBlock.of((char)c)==Character.UnicodeBlock.BASIC_LATIN &&
2962 			 !Character.isISOControl((char)c) ) {
2963 			if ( c=='\\' ) {
2964 				return "'\\\\'";
2965 			}
2966 			if ( c=='\'') {
2967 				return "'\\''";
2968 			}
2969 			return '\''+Character.toString((char)c)+'\'';
2970 		}
2971 		// turn on the bit above max "\uFFFF" value so that we pad with zeros
2972 		// then only take last 4 digits
2973 		String hex = Integer.toHexString(c|0x10000).toUpperCase().substring(1,5);
2974 		String unicodeStr = "'\\u"+hex+"'";
2975 		return unicodeStr;
2976 	}
2977 
2978 	/** For lexer grammars, return everything in unicode not in set.
2979 	 *  For parser and tree grammars, return everything in token space
2980 	 *  from MIN_TOKEN_TYPE to last valid token type or char value.
2981 	 */
complement(IntSet set)2982 	public IntSet complement(IntSet set) {
2983 		//System.out.println("complement "+set.toString(this));
2984 		//System.out.println("vocabulary "+getTokenTypes().toString(this));
2985 		IntSet c = set.complement(getTokenTypes());
2986 		//System.out.println("result="+c.toString(this));
2987 		return c;
2988 	}
2989 
complement(int atom)2990 	public IntSet complement(int atom) {
2991 		return complement(IntervalSet.of(atom));
2992 	}
2993 
2994 	/** Given set tree like ( SET A B ), check that A and B
2995 	 *  are both valid sets themselves, else we must tree like a BLOCK
2996 	 */
isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t)2997 	public boolean isValidSet(TreeToNFAConverter nfabuilder, GrammarAST t) {
2998 		boolean valid = true;
2999 		try {
3000 			//System.out.println("parse BLOCK as set tree: "+t.toStringTree());
3001 			int alts = nfabuilder.testBlockAsSet(t);
3002 			valid = alts > 1;
3003 		}
3004 		catch (RecognitionException re) {
3005 			// The rule did not parse as a set, return null; ignore exception
3006 			valid = false;
3007 		}
3008 		//System.out.println("valid? "+valid);
3009 		return valid;
3010 	}
3011 
3012 	/** Get the set equivalent (if any) of the indicated rule from this
3013 	 *  grammar.  Mostly used in the lexer to do ~T for some fragment rule
3014 	 *  T.  If the rule AST has a SET use that.  If the rule is a single char
3015 	 *  convert it to a set and return.  If rule is not a simple set (w/o actions)
3016 	 *  then return null.
3017 	 *  Rules have AST form:
3018 	 *
3019 	 *		^( RULE ID modifier ARG RET SCOPE block EOR )
3020 	 */
getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)3021 	public IntSet getSetFromRule(TreeToNFAConverter nfabuilder, String ruleName)
3022 		throws RecognitionException
3023 	{
3024 		Rule r = getRule(ruleName);
3025 		if ( r==null ) {
3026 			return null;
3027 		}
3028 		IntSet elements = null;
3029 		//System.out.println("parsed tree: "+r.tree.toStringTree());
3030 		elements = nfabuilder.setRule(r.tree);
3031 		//System.out.println("elements="+elements);
3032 		return elements;
3033 	}
3034 
3035 	/** Decisions are linked together with transition(1).  Count how
3036 	 *  many there are.  This is here rather than in NFAState because
3037 	 *  a grammar decides how NFAs are put together to form a decision.
3038 	 */
getNumberOfAltsForDecisionNFA(NFAState decisionState)3039 	public int getNumberOfAltsForDecisionNFA(NFAState decisionState) {
3040 		if ( decisionState==null ) {
3041 			return 0;
3042 		}
3043 		int n = 1;
3044 		NFAState p = decisionState;
3045 		while ( p.transition[1] !=null ) {
3046 			n++;
3047 			p = (NFAState)p.transition[1].target;
3048 		}
3049 		return n;
3050 	}
3051 
3052 	/** Get the ith alternative (1..n) from a decision; return null when
3053 	 *  an invalid alt is requested.  I must count in to find the right
3054 	 *  alternative number.  For (A|B), you get NFA structure (roughly):
3055 	 *
3056 	 *  o->o-A->o
3057 	 *  |
3058 	 *  o->o-B->o
3059 	 *
3060 	 *  This routine returns the leftmost state for each alt.  So alt=1, returns
3061 	 *  the upperleft most state in this structure.
3062 	 */
getNFAStateForAltOfDecision(NFAState decisionState, int alt)3063 	public NFAState getNFAStateForAltOfDecision(NFAState decisionState, int alt) {
3064 		if ( decisionState==null || alt<=0 ) {
3065 			return null;
3066 		}
3067 		int n = 1;
3068 		NFAState p = decisionState;
3069 		while ( p!=null ) {
3070 			if ( n==alt ) {
3071 				return p;
3072 			}
3073 			n++;
3074 			Transition next = p.transition[1];
3075 			p = null;
3076 			if ( next!=null ) {
3077 				p = (NFAState)next.target;
3078 			}
3079 		}
3080 		return null;
3081 	}
3082 
3083 	/*
3084 	public void computeRuleFOLLOWSets() {
3085 		if ( getNumberOfDecisions()==0 ) {
3086 			createNFAs();
3087 		}
3088 		for (Iterator it = getRules().iterator(); it.hasNext();) {
3089 			Rule r = (Rule)it.next();
3090 			if ( r.isSynPred ) {
3091 				continue;
3092 			}
3093 			LookaheadSet s = ll1Analyzer.FOLLOW(r);
3094 			System.out.println("FOLLOW("+r.name+")="+s);
3095 		}
3096 	}
3097 	*/
3098 
FIRST(NFAState s)3099 	public LookaheadSet FIRST(NFAState s) {
3100 		return ll1Analyzer.FIRST(s);
3101 	}
3102 
LOOK(NFAState s)3103 	public LookaheadSet LOOK(NFAState s) {
3104 		return ll1Analyzer.LOOK(s);
3105 	}
3106 
setCodeGenerator(CodeGenerator generator)3107 	public void setCodeGenerator(CodeGenerator generator) {
3108 		this.generator = generator;
3109 	}
3110 
getCodeGenerator()3111 	public CodeGenerator getCodeGenerator() {
3112 		return generator;
3113 	}
3114 
getGrammarTree()3115 	public GrammarAST getGrammarTree() {
3116 		return grammarTree;
3117 	}
3118 
setGrammarTree(GrammarAST value)3119 	public void setGrammarTree(GrammarAST value) {
3120 		grammarTree = value;
3121 	}
3122 
getTool()3123 	public Tool getTool() {
3124 		return tool;
3125 	}
3126 
setTool(Tool tool)3127 	public void setTool(Tool tool) {
3128 		this.tool = tool;
3129 	}
3130 
3131 	/** given a token type and the text of the literal, come up with a
3132 	 *  decent token type label.  For now it's just T<type>.  Actually,
3133 	 *  if there is an aliased name from tokens like PLUS='+', use it.
3134 	 */
computeTokenNameFromLiteral(int tokenType, String literal)3135 	public String computeTokenNameFromLiteral(int tokenType, String literal) {
3136 		return AUTO_GENERATED_TOKEN_NAME_PREFIX +tokenType;
3137 	}
3138 
toString()3139 	public String toString() {
3140 	//	return "FFFFFFFFFFFFFF";
3141 		return grammarTreeToString(grammarTree);
3142 	}
3143 
grammarTreeToString(GrammarAST t)3144 	public String grammarTreeToString(GrammarAST t) {
3145 		return grammarTreeToString(t, true);
3146 	}
3147 
grammarTreeToString(GrammarAST t, boolean showActions)3148 	public String grammarTreeToString(GrammarAST t, boolean showActions) {
3149 		String s = null;
3150 		try {
3151 			s = t.getLine()+":"+(t.getCharPositionInLine()+1)+": ";
3152 			s += new ANTLRTreePrinter(new CommonTreeNodeStream(t)).toString(this, showActions);
3153 		}
3154 		catch (Exception e) {
3155 			s = "<invalid or missing tree structure>";
3156 		}
3157 		return s;
3158 	}
3159 
printGrammar(PrintStream output)3160 	public void printGrammar(PrintStream output) {
3161 		ANTLRTreePrinter printer = new ANTLRTreePrinter(new CommonTreeNodeStream(getGrammarTree()));
3162 		try {
3163 			String g = printer.toString(this, false);
3164 			output.println(g);
3165 		}
3166 		catch (RecognitionException re) {
3167 			ErrorManager.error(ErrorManager.MSG_SYNTAX_ERROR,re);
3168 		}
3169 	}
3170 
3171 }
3172