1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.tool; 29 30 import org.antlr.analysis.Label; 31 import org.antlr.grammar.v3.AssignTokenTypesWalker; 32 import org.antlr.misc.Utils; 33 import org.antlr.runtime.tree.TreeNodeStream; 34 35 import java.util.*; 36 37 /** Move all of the functionality from assign.types.g grammar file. */ 38 public class AssignTokenTypesBehavior extends AssignTokenTypesWalker { 39 protected static final Integer UNASSIGNED = Utils.integer(-1); 40 protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2); 41 42 protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>(); 43 protected Map<String,Integer> tokens = new TreeMap<String, Integer>(); 44 protected Map<String,String> aliases = new TreeMap<String, String>(); 45 protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>(); 46 47 /** Track actual lexer rule defs so we don't get repeated token defs in 48 * generated lexer. 49 */ 50 protected Set<String> tokenRuleDefs = new HashSet(); 51 AssignTokenTypesBehavior()52 public AssignTokenTypesBehavior() { 53 super(null); 54 } 55 56 @Override init(Grammar g)57 protected void init(Grammar g) { 58 this.grammar = g; 59 currentRuleName = null; 60 if ( stringAlias==null ) { 61 // only init once; can't statically init since we need astFactory 62 initASTPatterns(); 63 } 64 } 65 66 /** Track string literals (could be in tokens{} section) */ 67 @Override trackString(GrammarAST t)68 protected void trackString(GrammarAST t) { 69 // if lexer, don't allow aliasing in tokens section 70 if ( currentRuleName==null && grammar.type==Grammar.LEXER ) { 71 ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER, 72 grammar, 73 t.token, 74 t.getText()); 75 return; 76 } 77 // in a plain parser grammar rule, cannot reference literals 78 // (unless defined previously via tokenVocab option) 79 // don't warn until we hit root grammar as may be defined there. 80 if ( grammar.getGrammarIsRoot() && 81 grammar.type==Grammar.PARSER && 82 grammar.getTokenType(t.getText())== Label.INVALID ) 83 { 84 ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE, 85 grammar, 86 t.token, 87 t.getText()); 88 } 89 // Don't record literals for lexers, they are things to match not tokens 90 if ( grammar.type==Grammar.LEXER ) { 91 return; 92 } 93 // otherwise add literal to token types if referenced from parser rule 94 // or in the tokens{} section 95 if ( (currentRuleName==null || 96 Character.isLowerCase(currentRuleName.charAt(0))) && 97 grammar.getTokenType(t.getText())==Label.INVALID ) 98 { 99 stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE); 100 } 101 } 102 103 @Override trackToken(GrammarAST t)104 protected void trackToken(GrammarAST t) { 105 // imported token names might exist, only add if new 106 // Might have ';'=4 in vocab import and SEMI=';'. Avoid 107 // setting to UNASSIGNED if we have loaded ';'/SEMI 108 if ( grammar.getTokenType(t.getText())==Label.INVALID && 109 tokens.get(t.getText())==null ) 110 { 111 tokens.put(t.getText(), UNASSIGNED); 112 } 113 } 114 115 @Override trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block)116 protected void trackTokenRule(GrammarAST t, 117 GrammarAST modifier, 118 GrammarAST block) 119 { 120 // imported token names might exist, only add if new 121 if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) { 122 if ( !Character.isUpperCase(t.getText().charAt(0)) ) { 123 return; 124 } 125 if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { 126 // don't add Tokens rule 127 return; 128 } 129 130 // track all lexer rules so we can look for token refs w/o 131 // associated lexer rules. 132 grammar.composite.lexerRules.add(t.getText()); 133 134 int existing = grammar.getTokenType(t.getText()); 135 if ( existing==Label.INVALID ) { 136 tokens.put(t.getText(), UNASSIGNED); 137 } 138 // look for "<TOKEN> : <literal> ;" pattern 139 // (can have optional action last) 140 if ( block.hasSameTreeStructure(charAlias) || 141 block.hasSameTreeStructure(stringAlias) || 142 block.hasSameTreeStructure(charAlias2) || 143 block.hasSameTreeStructure(stringAlias2) ) 144 { 145 tokenRuleDefs.add(t.getText()); 146 /* 147 Grammar parent = grammar.composite.getDelegator(grammar); 148 boolean importedByParserOrCombined = 149 parent!=null && 150 (parent.type==Grammar.LEXER||parent.type==Grammar.PARSER); 151 */ 152 if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) { 153 // only call this rule an alias if combined or lexer 154 alias(t, (GrammarAST)block.getChild(0).getChild(0)); 155 } 156 } 157 } 158 // else error 159 } 160 161 @Override alias(GrammarAST t, GrammarAST s)162 protected void alias(GrammarAST t, GrammarAST s) { 163 String tokenID = t.getText(); 164 String literal = s.getText(); 165 String prevAliasLiteralID = aliasesReverseIndex.get(literal); 166 if ( prevAliasLiteralID!=null ) { // we've seen this literal before 167 if ( tokenID.equals(prevAliasLiteralID) ) { 168 // duplicate but identical alias; might be tokens {A='a'} and 169 // lexer rule A : 'a' ; Is ok, just return 170 return; 171 } 172 173 // give error unless both are rules (ok if one is in tokens section) 174 if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) ) 175 { 176 // don't allow alias if A='a' in tokens section and B : 'a'; is rule. 177 // Allow if both are rules. Will get DFA nondeterminism error later. 178 ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT, 179 grammar, 180 t.token, 181 tokenID+"="+literal, 182 prevAliasLiteralID); 183 } 184 return; // don't do the alias 185 } 186 int existingLiteralType = grammar.getTokenType(literal); 187 if ( existingLiteralType !=Label.INVALID ) { 188 // we've seen this before from a tokenVocab most likely 189 // don't assign a new token type; use existingLiteralType. 190 tokens.put(tokenID, existingLiteralType); 191 } 192 String prevAliasTokenID = aliases.get(tokenID); 193 if ( prevAliasTokenID!=null ) { 194 ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT, 195 grammar, 196 t.token, 197 tokenID+"="+literal, 198 prevAliasTokenID); 199 return; // don't do the alias 200 } 201 aliases.put(tokenID, literal); 202 aliasesReverseIndex.put(literal, tokenID); 203 } 204 205 @Override defineTokens(Grammar root)206 public void defineTokens(Grammar root) { 207 /* 208 System.out.println("stringLiterals="+stringLiterals); 209 System.out.println("tokens="+tokens); 210 System.out.println("aliases="+aliases); 211 System.out.println("aliasesReverseIndex="+aliasesReverseIndex); 212 */ 213 214 assignTokenIDTypes(root); 215 216 aliasTokenIDsAndLiterals(root); 217 218 assignStringTypes(root); 219 220 /* 221 System.out.println("stringLiterals="+stringLiterals); 222 System.out.println("tokens="+tokens); 223 System.out.println("aliases="+aliases); 224 */ 225 defineTokenNamesAndLiteralsInGrammar(root); 226 } 227 228 /* 229 protected void defineStringLiteralsFromDelegates() { 230 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) { 231 List<Grammar> delegates = grammar.getDelegates(); 232 System.out.println("delegates in master combined: "+delegates); 233 for (int i = 0; i < delegates.size(); i++) { 234 Grammar d = (Grammar) delegates.get(i); 235 Set<String> literals = d.getStringLiterals(); 236 for (Iterator it = literals.iterator(); it.hasNext();) { 237 String literal = (String) it.next(); 238 System.out.println("literal "+literal); 239 int ttype = grammar.getTokenType(literal); 240 grammar.defineLexerRuleForStringLiteral(literal, ttype); 241 } 242 } 243 } 244 } 245 */ 246 247 @Override assignStringTypes(Grammar root)248 protected void assignStringTypes(Grammar root) { 249 // walk string literals assigning types to unassigned ones 250 Set s = stringLiterals.keySet(); 251 for (Iterator it = s.iterator(); it.hasNext();) { 252 String lit = (String) it.next(); 253 Integer oldTypeI = (Integer)stringLiterals.get(lit); 254 int oldType = oldTypeI.intValue(); 255 if ( oldType<Label.MIN_TOKEN_TYPE ) { 256 Integer typeI = Utils.integer(root.getNewTokenType()); 257 stringLiterals.put(lit, typeI); 258 // if string referenced in combined grammar parser rule, 259 // automatically define in the generated lexer 260 root.defineLexerRuleForStringLiteral(lit, typeI.intValue()); 261 } 262 } 263 } 264 265 @Override aliasTokenIDsAndLiterals(Grammar root)266 protected void aliasTokenIDsAndLiterals(Grammar root) { 267 if ( root.type==Grammar.LEXER ) { 268 return; // strings/chars are never token types in LEXER 269 } 270 // walk aliases if any and assign types to aliased literals if literal 271 // was referenced 272 Set s = aliases.keySet(); 273 for (Iterator it = s.iterator(); it.hasNext();) { 274 String tokenID = (String) it.next(); 275 String literal = (String)aliases.get(tokenID); 276 if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) { 277 stringLiterals.put(literal, tokens.get(tokenID)); 278 // an alias still means you need a lexer rule for it 279 Integer typeI = (Integer)tokens.get(tokenID); 280 if ( !tokenRuleDefs.contains(tokenID) ) { 281 root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI.intValue()); 282 } 283 } 284 } 285 } 286 287 @Override assignTokenIDTypes(Grammar root)288 protected void assignTokenIDTypes(Grammar root) { 289 // walk token names, assigning values if unassigned 290 Set s = tokens.keySet(); 291 for (Iterator it = s.iterator(); it.hasNext();) { 292 String tokenID = (String) it.next(); 293 if ( tokens.get(tokenID)==UNASSIGNED ) { 294 tokens.put(tokenID, Utils.integer(root.getNewTokenType())); 295 } 296 } 297 } 298 299 @Override defineTokenNamesAndLiteralsInGrammar(Grammar root)300 protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) { 301 Set s = tokens.keySet(); 302 for (Iterator it = s.iterator(); it.hasNext();) { 303 String tokenID = (String) it.next(); 304 int ttype = ((Integer)tokens.get(tokenID)).intValue(); 305 root.defineToken(tokenID, ttype); 306 } 307 s = stringLiterals.keySet(); 308 for (Iterator it = s.iterator(); it.hasNext();) { 309 String lit = (String) it.next(); 310 int ttype = ((Integer)stringLiterals.get(lit)).intValue(); 311 root.defineToken(lit, ttype); 312 } 313 } 314 315 } 316