• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * [The "BSD license"]
3  *  Copyright (c) 2010 Terence Parr
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions
8  *  are met:
9  *  1. Redistributions of source code must retain the above copyright
10  *      notice, this list of conditions and the following disclaimer.
11  *  2. Redistributions in binary form must reproduce the above copyright
12  *      notice, this list of conditions and the following disclaimer in the
13  *      documentation and/or other materials provided with the distribution.
14  *  3. The name of the author may not be used to endorse or promote products
15  *      derived from this software without specific prior written permission.
16  *
17  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 package org.antlr.tool;
29 
30 import org.antlr.analysis.Label;
31 import org.antlr.grammar.v3.AssignTokenTypesWalker;
32 import org.antlr.misc.Utils;
33 import org.antlr.runtime.tree.TreeNodeStream;
34 
35 import java.util.*;
36 
37 /** Move all of the functionality from assign.types.g grammar file. */
38 public class AssignTokenTypesBehavior extends AssignTokenTypesWalker {
39 	protected static final Integer UNASSIGNED = Utils.integer(-1);
40 	protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2);
41 
42 	protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>();
43 	protected Map<String,Integer> tokens = new TreeMap<String, Integer>();
44 	protected Map<String,String> aliases = new TreeMap<String, String>();
45 	protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>();
46 
47 	/** Track actual lexer rule defs so we don't get repeated token defs in
48 	 *  generated lexer.
49 	 */
50 	protected Set<String> tokenRuleDefs = new HashSet();
51 
AssignTokenTypesBehavior()52 	public AssignTokenTypesBehavior() {
53 		super(null);
54 	}
55 
56     @Override
init(Grammar g)57 	protected void init(Grammar g) {
58 		this.grammar = g;
59 		currentRuleName = null;
60 		if ( stringAlias==null ) {
61 			// only init once; can't statically init since we need astFactory
62 			initASTPatterns();
63 		}
64 	}
65 
66 	/** Track string literals (could be in tokens{} section) */
67     @Override
trackString(GrammarAST t)68 	protected void trackString(GrammarAST t) {
69 		// if lexer, don't allow aliasing in tokens section
70 		if ( currentRuleName==null && grammar.type==Grammar.LEXER ) {
71 			ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER,
72 									  grammar,
73 									  t.token,
74 									  t.getText());
75 			return;
76 		}
77 		// in a plain parser grammar rule, cannot reference literals
78 		// (unless defined previously via tokenVocab option)
79 		// don't warn until we hit root grammar as may be defined there.
80 		if ( grammar.getGrammarIsRoot() &&
81 			 grammar.type==Grammar.PARSER &&
82 			 grammar.getTokenType(t.getText())== Label.INVALID )
83 		{
84 			ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE,
85 									  grammar,
86 									  t.token,
87 									  t.getText());
88 		}
89 		// Don't record literals for lexers, they are things to match not tokens
90 		if ( grammar.type==Grammar.LEXER ) {
91 			return;
92 		}
93 		// otherwise add literal to token types if referenced from parser rule
94 		// or in the tokens{} section
95 		if ( (currentRuleName==null ||
96 			  Character.isLowerCase(currentRuleName.charAt(0))) &&
97 																grammar.getTokenType(t.getText())==Label.INVALID )
98 		{
99 			stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE);
100 		}
101 	}
102 
103     @Override
trackToken(GrammarAST t)104 	protected void trackToken(GrammarAST t) {
105 		// imported token names might exist, only add if new
106 		// Might have ';'=4 in vocab import and SEMI=';'. Avoid
107 		// setting to UNASSIGNED if we have loaded ';'/SEMI
108 		if ( grammar.getTokenType(t.getText())==Label.INVALID &&
109 			 tokens.get(t.getText())==null )
110 		{
111 			tokens.put(t.getText(), UNASSIGNED);
112 		}
113 	}
114 
115     @Override
trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block)116 	protected void trackTokenRule(GrammarAST t,
117 								  GrammarAST modifier,
118 								  GrammarAST block)
119 	{
120 		// imported token names might exist, only add if new
121 		if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) {
122 			if ( !Character.isUpperCase(t.getText().charAt(0)) ) {
123 				return;
124 			}
125 			if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) {
126 				// don't add Tokens rule
127 				return;
128 			}
129 
130 			// track all lexer rules so we can look for token refs w/o
131 			// associated lexer rules.
132 			grammar.composite.lexerRules.add(t.getText());
133 
134 			int existing = grammar.getTokenType(t.getText());
135 			if ( existing==Label.INVALID ) {
136 				tokens.put(t.getText(), UNASSIGNED);
137 			}
138 			// look for "<TOKEN> : <literal> ;" pattern
139 			// (can have optional action last)
140 			if ( block.hasSameTreeStructure(charAlias) ||
141 				 block.hasSameTreeStructure(stringAlias) ||
142 				 block.hasSameTreeStructure(charAlias2) ||
143 				 block.hasSameTreeStructure(stringAlias2) )
144 			{
145 				tokenRuleDefs.add(t.getText());
146 				/*
147 			Grammar parent = grammar.composite.getDelegator(grammar);
148 			boolean importedByParserOrCombined =
149 				parent!=null &&
150 				(parent.type==Grammar.LEXER||parent.type==Grammar.PARSER);
151 				*/
152 				if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) {
153 					// only call this rule an alias if combined or lexer
154 					alias(t, (GrammarAST)block.getChild(0).getChild(0));
155 				}
156 			}
157 		}
158 		// else error
159 	}
160 
161     @Override
alias(GrammarAST t, GrammarAST s)162 	protected void alias(GrammarAST t, GrammarAST s) {
163 		String tokenID = t.getText();
164 		String literal = s.getText();
165 		String prevAliasLiteralID = aliasesReverseIndex.get(literal);
166 		if ( prevAliasLiteralID!=null ) { // we've seen this literal before
167 			if ( tokenID.equals(prevAliasLiteralID) ) {
168 				// duplicate but identical alias; might be tokens {A='a'} and
169 				// lexer rule A : 'a' ;  Is ok, just return
170 				return;
171 			}
172 
173 			// give error unless both are rules (ok if one is in tokens section)
174 			if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) )
175 			{
176 				// don't allow alias if A='a' in tokens section and B : 'a'; is rule.
177 				// Allow if both are rules.  Will get DFA nondeterminism error later.
178 				ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT,
179 										  grammar,
180 										  t.token,
181 										  tokenID+"="+literal,
182 										  prevAliasLiteralID);
183 			}
184 			return; // don't do the alias
185 		}
186 		int existingLiteralType = grammar.getTokenType(literal);
187 		if ( existingLiteralType !=Label.INVALID ) {
188 			// we've seen this before from a tokenVocab most likely
189 			// don't assign a new token type; use existingLiteralType.
190 			tokens.put(tokenID, existingLiteralType);
191 		}
192 		String prevAliasTokenID = aliases.get(tokenID);
193 		if ( prevAliasTokenID!=null ) {
194 			ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT,
195 									  grammar,
196 									  t.token,
197 									  tokenID+"="+literal,
198 									  prevAliasTokenID);
199 			return; // don't do the alias
200 		}
201 		aliases.put(tokenID, literal);
202 		aliasesReverseIndex.put(literal, tokenID);
203 	}
204 
205     @Override
defineTokens(Grammar root)206 	public void defineTokens(Grammar root) {
207 /*
208 	System.out.println("stringLiterals="+stringLiterals);
209 	System.out.println("tokens="+tokens);
210 	System.out.println("aliases="+aliases);
211 	System.out.println("aliasesReverseIndex="+aliasesReverseIndex);
212 */
213 
214 		assignTokenIDTypes(root);
215 
216 		aliasTokenIDsAndLiterals(root);
217 
218 		assignStringTypes(root);
219 
220 /*
221 	System.out.println("stringLiterals="+stringLiterals);
222 	System.out.println("tokens="+tokens);
223 	System.out.println("aliases="+aliases);
224 */
225 		defineTokenNamesAndLiteralsInGrammar(root);
226 	}
227 
228 /*
229 protected void defineStringLiteralsFromDelegates() {
230 	 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) {
231 		 List<Grammar> delegates = grammar.getDelegates();
232 		 System.out.println("delegates in master combined: "+delegates);
233 		 for (int i = 0; i < delegates.size(); i++) {
234 			 Grammar d = (Grammar) delegates.get(i);
235 			 Set<String> literals = d.getStringLiterals();
236 			 for (Iterator it = literals.iterator(); it.hasNext();) {
237 				 String literal = (String) it.next();
238 				 System.out.println("literal "+literal);
239 				 int ttype = grammar.getTokenType(literal);
240 				 grammar.defineLexerRuleForStringLiteral(literal, ttype);
241 			 }
242 		 }
243 	 }
244 }
245 */
246 
247     @Override
assignStringTypes(Grammar root)248 	protected void assignStringTypes(Grammar root) {
249 		// walk string literals assigning types to unassigned ones
250 		Set s = stringLiterals.keySet();
251 		for (Iterator it = s.iterator(); it.hasNext();) {
252 			String lit = (String) it.next();
253 			Integer oldTypeI = (Integer)stringLiterals.get(lit);
254 			int oldType = oldTypeI.intValue();
255 			if ( oldType<Label.MIN_TOKEN_TYPE ) {
256 				Integer typeI = Utils.integer(root.getNewTokenType());
257 				stringLiterals.put(lit, typeI);
258 				// if string referenced in combined grammar parser rule,
259 				// automatically define in the generated lexer
260 				root.defineLexerRuleForStringLiteral(lit, typeI.intValue());
261 			}
262 		}
263 	}
264 
265     @Override
aliasTokenIDsAndLiterals(Grammar root)266 	protected void aliasTokenIDsAndLiterals(Grammar root) {
267 		if ( root.type==Grammar.LEXER ) {
268 			return; // strings/chars are never token types in LEXER
269 		}
270 		// walk aliases if any and assign types to aliased literals if literal
271 		// was referenced
272 		Set s = aliases.keySet();
273 		for (Iterator it = s.iterator(); it.hasNext();) {
274 			String tokenID = (String) it.next();
275 			String literal = (String)aliases.get(tokenID);
276 			if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) {
277 				stringLiterals.put(literal, tokens.get(tokenID));
278 				// an alias still means you need a lexer rule for it
279 				Integer typeI = (Integer)tokens.get(tokenID);
280 				if ( !tokenRuleDefs.contains(tokenID) ) {
281 					root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI.intValue());
282 				}
283 			}
284 		}
285 	}
286 
287     @Override
assignTokenIDTypes(Grammar root)288 	protected void assignTokenIDTypes(Grammar root) {
289 		// walk token names, assigning values if unassigned
290 		Set s = tokens.keySet();
291 		for (Iterator it = s.iterator(); it.hasNext();) {
292 			String tokenID = (String) it.next();
293 			if ( tokens.get(tokenID)==UNASSIGNED ) {
294 				tokens.put(tokenID, Utils.integer(root.getNewTokenType()));
295 			}
296 		}
297 	}
298 
299     @Override
defineTokenNamesAndLiteralsInGrammar(Grammar root)300 	protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) {
301 		Set s = tokens.keySet();
302 		for (Iterator it = s.iterator(); it.hasNext();) {
303 			String tokenID = (String) it.next();
304 			int ttype = ((Integer)tokens.get(tokenID)).intValue();
305 			root.defineToken(tokenID, ttype);
306 		}
307 		s = stringLiterals.keySet();
308 		for (Iterator it = s.iterator(); it.hasNext();) {
309 			String lit = (String) it.next();
310 			int ttype = ((Integer)stringLiterals.get(lit)).intValue();
311 			root.defineToken(lit, ttype);
312 		}
313 	}
314 
315 }
316