• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.analysis.Label;
31 import org.antlr.grammar.v3.AssignTokenTypesWalker;
32 import org.antlr.misc.Utils;
33 
34 import java.util.*;
35 
36 /** Move all of the functionality from assign.types.g grammar file. */
37 public class AssignTokenTypesBehavior extends AssignTokenTypesWalker {
38 	protected static final Integer UNASSIGNED = Utils.integer(-1);
39 	protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2);
40 
41 	protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>();
42 	protected Map<String,Integer> tokens = new TreeMap<String, Integer>();
43 	protected Map<String,String> aliases = new TreeMap<String, String>();
44 	protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>();
45 
46 	/** Track actual lexer rule defs so we don't get repeated token defs in
47 	 *  generated lexer.
48 	 */
49 	protected Set<String> tokenRuleDefs = new HashSet<String>();
50 
AssignTokenTypesBehavior()51 	public AssignTokenTypesBehavior() {
52 		super(null);
53 	}
54 
55     @Override
init(Grammar g)56 	protected void init(Grammar g) {
57 		this.grammar = g;
58 		currentRuleName = null;
59 		if ( stringAlias==null ) {
60 			// only init once; can't statically init since we need astFactory
61 			initASTPatterns();
62 		}
63 	}
64 
65 	/** Track string literals (could be in tokens{} section) */
66     @Override
trackString(GrammarAST t)67 	protected void trackString(GrammarAST t) {
68 		// if lexer, don't allow aliasing in tokens section
69 		if ( currentRuleName==null && grammar.type==Grammar.LEXER ) {
70 			ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER,
71 									  grammar,
72 									  t.token,
73 									  t.getText());
74 			return;
75 		}
76 		// in a plain parser grammar rule, cannot reference literals
77 		// (unless defined previously via tokenVocab option)
78 		// don't warn until we hit root grammar as may be defined there.
79 		if ( grammar.getGrammarIsRoot() &&
80 			 grammar.type==Grammar.PARSER &&
81 			 grammar.getTokenType(t.getText())== Label.INVALID )
82 		{
83 			ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE,
84 									  grammar,
85 									  t.token,
86 									  t.getText());
87 		}
88 		// Don't record literals for lexers, they are things to match not tokens
89 		if ( grammar.type==Grammar.LEXER ) {
90 			return;
91 		}
92 		// otherwise add literal to token types if referenced from parser rule
93 		// or in the tokens{} section
94 		if ( (currentRuleName==null ||
95 			  Character.isLowerCase(currentRuleName.charAt(0))) &&
96 																grammar.getTokenType(t.getText())==Label.INVALID )
97 		{
98 			stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE);
99 		}
100 	}
101 
102     @Override
trackToken(GrammarAST t)103 	protected void trackToken(GrammarAST t) {
104 		// imported token names might exist, only add if new
105 		// Might have ';'=4 in vocab import and SEMI=';'. Avoid
106 		// setting to UNASSIGNED if we have loaded ';'/SEMI
107 		if ( grammar.getTokenType(t.getText())==Label.INVALID &&
108 			 tokens.get(t.getText())==null )
109 		{
110 			tokens.put(t.getText(), UNASSIGNED);
111 		}
112 	}
113 
114     @Override
trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block)115 	protected void trackTokenRule(GrammarAST t,
116 								  GrammarAST modifier,
117 								  GrammarAST block)
118 	{
119 		// imported token names might exist, only add if new
120 		if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) {
121 			if ( !Character.isUpperCase(t.getText().charAt(0)) ) {
122 				return;
123 			}
124 			if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
125 				// don't add Tokens rule
126 				return;
127 			}
128 
129 			// track all lexer rules so we can look for token refs w/o
130 			// associated lexer rules.
131 			grammar.composite.lexerRules.add(t.getText());
132 
133 			int existing = grammar.getTokenType(t.getText());
134 			if ( existing==Label.INVALID ) {
135 				tokens.put(t.getText(), UNASSIGNED);
136 			}
137 			// look for "<TOKEN> : <literal> ;" pattern
138 			// (can have optional action last)
139 			if ( block.hasSameTreeStructure(charAlias) ||
140 				 block.hasSameTreeStructure(stringAlias) ||
141 				 block.hasSameTreeStructure(charAlias2) ||
142 				 block.hasSameTreeStructure(stringAlias2) )
143 			{
144 				tokenRuleDefs.add(t.getText());
145 				/*
146 			Grammar parent = grammar.composite.getDelegator(grammar);
147 			boolean importedByParserOrCombined =
148 				parent!=null &&
149 				(parent.type==Grammar.LEXER||parent.type==Grammar.PARSER);
150 				*/
151 				if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) {
152 					// only call this rule an alias if combined or lexer
153 					alias(t, (GrammarAST)block.getChild(0).getChild(0));
154 				}
155 			}
156 		}
157 		// else error
158 	}
159 
160     @Override
alias(GrammarAST t, GrammarAST s)161 	protected void alias(GrammarAST t, GrammarAST s) {
162 		String tokenID = t.getText();
163 		String literal = s.getText();
164 		String prevAliasLiteralID = aliasesReverseIndex.get(literal);
165 		if ( prevAliasLiteralID!=null ) { // we've seen this literal before
166 			if ( tokenID.equals(prevAliasLiteralID) ) {
167 				// duplicate but identical alias; might be tokens {A='a'} and
168 				// lexer rule A : 'a' ;  Is ok, just return
169 				return;
170 			}
171 
172 			// give error unless both are rules (ok if one is in tokens section)
173 			if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) )
174 			{
175 				// don't allow alias if A='a' in tokens section and B : 'a'; is rule.
176 				// Allow if both are rules.  Will get DFA nondeterminism error later.
177 				ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT,
178 										  grammar,
179 										  t.token,
180 										  tokenID+"="+literal,
181 										  prevAliasLiteralID);
182 			}
183 			return; // don't do the alias
184 		}
185 		int existingLiteralType = grammar.getTokenType(literal);
186 		if ( existingLiteralType !=Label.INVALID ) {
187 			// we've seen this before from a tokenVocab most likely
188 			// don't assign a new token type; use existingLiteralType.
189 			tokens.put(tokenID, existingLiteralType);
190 		}
191 		String prevAliasTokenID = aliases.get(tokenID);
192 		if ( prevAliasTokenID!=null ) {
193 			ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT,
194 									  grammar,
195 									  t.token,
196 									  tokenID+"="+literal,
197 									  prevAliasTokenID);
198 			return; // don't do the alias
199 		}
200 		aliases.put(tokenID, literal);
201 		aliasesReverseIndex.put(literal, tokenID);
202 	}
203 
204     @Override
defineTokens(Grammar root)205 	public void defineTokens(Grammar root) {
206 /*
207 	System.out.println("stringLiterals="+stringLiterals);
208 	System.out.println("tokens="+tokens);
209 	System.out.println("aliases="+aliases);
210 	System.out.println("aliasesReverseIndex="+aliasesReverseIndex);
211 */
212 
213 		assignTokenIDTypes(root);
214 
215 		aliasTokenIDsAndLiterals(root);
216 
217 		assignStringTypes(root);
218 
219 /*
220 	System.out.println("stringLiterals="+stringLiterals);
221 	System.out.println("tokens="+tokens);
222 	System.out.println("aliases="+aliases);
223 */
224 		defineTokenNamesAndLiteralsInGrammar(root);
225 	}
226 
227 /*
228 protected void defineStringLiteralsFromDelegates() {
229 	 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) {
230 		 List<Grammar> delegates = grammar.getDelegates();
231 		 System.out.println("delegates in master combined: "+delegates);
232 		 for (int i = 0; i < delegates.size(); i++) {
233 			 Grammar d = (Grammar) delegates.get(i);
234 			 Set<String> literals = d.getStringLiterals();
235 			 for (Iterator it = literals.iterator(); it.hasNext();) {
236 				 String literal = (String) it.next();
237 				 System.out.println("literal "+literal);
238 				 int ttype = grammar.getTokenType(literal);
239 				 grammar.defineLexerRuleForStringLiteral(literal, ttype);
240 			 }
241 		 }
242 	 }
243 }
244 */
245 
246     @Override
assignStringTypes(Grammar root)247 	protected void assignStringTypes(Grammar root) {
248 		// walk string literals assigning types to unassigned ones
249 		for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) {
250 			String lit = entry.getKey();
251 			Integer oldTypeI = entry.getValue();
252 			int oldType = oldTypeI;
253 			if ( oldType<Label.MIN_TOKEN_TYPE ) {
254 				Integer typeI = Utils.integer(root.getNewTokenType());
255 				stringLiterals.put(lit, typeI);
256 				// if string referenced in combined grammar parser rule,
257 				// automatically define in the generated lexer
258 				root.defineLexerRuleForStringLiteral(lit, typeI);
259 			}
260 		}
261 	}
262 
263     @Override
aliasTokenIDsAndLiterals(Grammar root)264 	protected void aliasTokenIDsAndLiterals(Grammar root) {
265 		if ( root.type==Grammar.LEXER ) {
266 			return; // strings/chars are never token types in LEXER
267 		}
268 		// walk aliases if any and assign types to aliased literals if literal
269 		// was referenced
270 		for (Map.Entry<String, String> entry : aliases.entrySet()) {
271 			String tokenID = entry.getKey();
272 			String literal = entry.getValue();
273 			if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) {
274 				stringLiterals.put(literal, tokens.get(tokenID));
275 				// an alias still means you need a lexer rule for it
276 				Integer typeI = tokens.get(tokenID);
277 				if ( !tokenRuleDefs.contains(tokenID) ) {
278 					root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI);
279 				}
280 			}
281 		}
282 	}
283 
284     @Override
assignTokenIDTypes(Grammar root)285 	protected void assignTokenIDTypes(Grammar root) {
286 		// walk token names, assigning values if unassigned
287 		for (Map.Entry<String, Integer> entry : tokens.entrySet()) {
288 			String tokenID = entry.getKey();
289 			if ( entry.getValue()==UNASSIGNED ) {
290 				tokens.put(tokenID, Utils.integer(root.getNewTokenType()));
291 			}
292 		}
293 	}
294 
295     @Override
defineTokenNamesAndLiteralsInGrammar(Grammar root)296 	protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) {
297 		for (Map.Entry<String, Integer> entry : tokens.entrySet()) {
298 			int ttype = entry.getValue();
299 			root.defineToken(entry.getKey(), ttype);
300 		}
301 		for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) {
302 			String lit = entry.getKey();
303 			int ttype = entry.getValue();
304 			root.defineToken(lit, ttype);
305 		}
306 	}
307 
308 }
309