1 /* 2 * [The "BSD license"] 3 * Copyright (c) 2010 Terence Parr 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 package org.antlr.tool; 29 30 import org.antlr.analysis.Label; 31 import org.antlr.grammar.v3.AssignTokenTypesWalker; 32 import org.antlr.misc.Utils; 33 34 import java.util.*; 35 36 /** Move all of the functionality from assign.types.g grammar file. */ 37 public class AssignTokenTypesBehavior extends AssignTokenTypesWalker { 38 protected static final Integer UNASSIGNED = Utils.integer(-1); 39 protected static final Integer UNASSIGNED_IN_PARSER_RULE = Utils.integer(-2); 40 41 protected Map<String,Integer> stringLiterals = new TreeMap<String, Integer>(); 42 protected Map<String,Integer> tokens = new TreeMap<String, Integer>(); 43 protected Map<String,String> aliases = new TreeMap<String, String>(); 44 protected Map<String,String> aliasesReverseIndex = new HashMap<String,String>(); 45 46 /** Track actual lexer rule defs so we don't get repeated token defs in 47 * generated lexer. 48 */ 49 protected Set<String> tokenRuleDefs = new HashSet<String>(); 50 AssignTokenTypesBehavior()51 public AssignTokenTypesBehavior() { 52 super(null); 53 } 54 55 @Override init(Grammar g)56 protected void init(Grammar g) { 57 this.grammar = g; 58 currentRuleName = null; 59 if ( stringAlias==null ) { 60 // only init once; can't statically init since we need astFactory 61 initASTPatterns(); 62 } 63 } 64 65 /** Track string literals (could be in tokens{} section) */ 66 @Override trackString(GrammarAST t)67 protected void trackString(GrammarAST t) { 68 // if lexer, don't allow aliasing in tokens section 69 if ( currentRuleName==null && grammar.type==Grammar.LEXER ) { 70 ErrorManager.grammarError(ErrorManager.MSG_CANNOT_ALIAS_TOKENS_IN_LEXER, 71 grammar, 72 t.token, 73 t.getText()); 74 return; 75 } 76 // in a plain parser grammar rule, cannot reference literals 77 // (unless defined previously via tokenVocab option) 78 // don't warn until we hit root grammar as may be defined there. 79 if ( grammar.getGrammarIsRoot() && 80 grammar.type==Grammar.PARSER && 81 grammar.getTokenType(t.getText())== Label.INVALID ) 82 { 83 ErrorManager.grammarError(ErrorManager.MSG_LITERAL_NOT_ASSOCIATED_WITH_LEXER_RULE, 84 grammar, 85 t.token, 86 t.getText()); 87 } 88 // Don't record literals for lexers, they are things to match not tokens 89 if ( grammar.type==Grammar.LEXER ) { 90 return; 91 } 92 // otherwise add literal to token types if referenced from parser rule 93 // or in the tokens{} section 94 if ( (currentRuleName==null || 95 Character.isLowerCase(currentRuleName.charAt(0))) && 96 grammar.getTokenType(t.getText())==Label.INVALID ) 97 { 98 stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE); 99 } 100 } 101 102 @Override trackToken(GrammarAST t)103 protected void trackToken(GrammarAST t) { 104 // imported token names might exist, only add if new 105 // Might have ';'=4 in vocab import and SEMI=';'. Avoid 106 // setting to UNASSIGNED if we have loaded ';'/SEMI 107 if ( grammar.getTokenType(t.getText())==Label.INVALID && 108 tokens.get(t.getText())==null ) 109 { 110 tokens.put(t.getText(), UNASSIGNED); 111 } 112 } 113 114 @Override trackTokenRule(GrammarAST t, GrammarAST modifier, GrammarAST block)115 protected void trackTokenRule(GrammarAST t, 116 GrammarAST modifier, 117 GrammarAST block) 118 { 119 // imported token names might exist, only add if new 120 if ( grammar.type==Grammar.LEXER || grammar.type==Grammar.COMBINED ) { 121 if ( !Character.isUpperCase(t.getText().charAt(0)) ) { 122 return; 123 } 124 if ( t.getText().equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ) { 125 // don't add Tokens rule 126 return; 127 } 128 129 // track all lexer rules so we can look for token refs w/o 130 // associated lexer rules. 131 grammar.composite.lexerRules.add(t.getText()); 132 133 int existing = grammar.getTokenType(t.getText()); 134 if ( existing==Label.INVALID ) { 135 tokens.put(t.getText(), UNASSIGNED); 136 } 137 // look for "<TOKEN> : <literal> ;" pattern 138 // (can have optional action last) 139 if ( block.hasSameTreeStructure(charAlias) || 140 block.hasSameTreeStructure(stringAlias) || 141 block.hasSameTreeStructure(charAlias2) || 142 block.hasSameTreeStructure(stringAlias2) ) 143 { 144 tokenRuleDefs.add(t.getText()); 145 /* 146 Grammar parent = grammar.composite.getDelegator(grammar); 147 boolean importedByParserOrCombined = 148 parent!=null && 149 (parent.type==Grammar.LEXER||parent.type==Grammar.PARSER); 150 */ 151 if ( grammar.type==Grammar.COMBINED || grammar.type==Grammar.LEXER ) { 152 // only call this rule an alias if combined or lexer 153 alias(t, (GrammarAST)block.getChild(0).getChild(0)); 154 } 155 } 156 } 157 // else error 158 } 159 160 @Override alias(GrammarAST t, GrammarAST s)161 protected void alias(GrammarAST t, GrammarAST s) { 162 String tokenID = t.getText(); 163 String literal = s.getText(); 164 String prevAliasLiteralID = aliasesReverseIndex.get(literal); 165 if ( prevAliasLiteralID!=null ) { // we've seen this literal before 166 if ( tokenID.equals(prevAliasLiteralID) ) { 167 // duplicate but identical alias; might be tokens {A='a'} and 168 // lexer rule A : 'a' ; Is ok, just return 169 return; 170 } 171 172 // give error unless both are rules (ok if one is in tokens section) 173 if ( !(tokenRuleDefs.contains(tokenID) && tokenRuleDefs.contains(prevAliasLiteralID)) ) 174 { 175 // don't allow alias if A='a' in tokens section and B : 'a'; is rule. 176 // Allow if both are rules. Will get DFA nondeterminism error later. 177 ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_CONFLICT, 178 grammar, 179 t.token, 180 tokenID+"="+literal, 181 prevAliasLiteralID); 182 } 183 return; // don't do the alias 184 } 185 int existingLiteralType = grammar.getTokenType(literal); 186 if ( existingLiteralType !=Label.INVALID ) { 187 // we've seen this before from a tokenVocab most likely 188 // don't assign a new token type; use existingLiteralType. 189 tokens.put(tokenID, existingLiteralType); 190 } 191 String prevAliasTokenID = aliases.get(tokenID); 192 if ( prevAliasTokenID!=null ) { 193 ErrorManager.grammarError(ErrorManager.MSG_TOKEN_ALIAS_REASSIGNMENT, 194 grammar, 195 t.token, 196 tokenID+"="+literal, 197 prevAliasTokenID); 198 return; // don't do the alias 199 } 200 aliases.put(tokenID, literal); 201 aliasesReverseIndex.put(literal, tokenID); 202 } 203 204 @Override defineTokens(Grammar root)205 public void defineTokens(Grammar root) { 206 /* 207 System.out.println("stringLiterals="+stringLiterals); 208 System.out.println("tokens="+tokens); 209 System.out.println("aliases="+aliases); 210 System.out.println("aliasesReverseIndex="+aliasesReverseIndex); 211 */ 212 213 assignTokenIDTypes(root); 214 215 aliasTokenIDsAndLiterals(root); 216 217 assignStringTypes(root); 218 219 /* 220 System.out.println("stringLiterals="+stringLiterals); 221 System.out.println("tokens="+tokens); 222 System.out.println("aliases="+aliases); 223 */ 224 defineTokenNamesAndLiteralsInGrammar(root); 225 } 226 227 /* 228 protected void defineStringLiteralsFromDelegates() { 229 if ( grammar.getGrammarIsMaster() && grammar.type==Grammar.COMBINED ) { 230 List<Grammar> delegates = grammar.getDelegates(); 231 System.out.println("delegates in master combined: "+delegates); 232 for (int i = 0; i < delegates.size(); i++) { 233 Grammar d = (Grammar) delegates.get(i); 234 Set<String> literals = d.getStringLiterals(); 235 for (Iterator it = literals.iterator(); it.hasNext();) { 236 String literal = (String) it.next(); 237 System.out.println("literal "+literal); 238 int ttype = grammar.getTokenType(literal); 239 grammar.defineLexerRuleForStringLiteral(literal, ttype); 240 } 241 } 242 } 243 } 244 */ 245 246 @Override assignStringTypes(Grammar root)247 protected void assignStringTypes(Grammar root) { 248 // walk string literals assigning types to unassigned ones 249 for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) { 250 String lit = entry.getKey(); 251 Integer oldTypeI = entry.getValue(); 252 int oldType = oldTypeI; 253 if ( oldType<Label.MIN_TOKEN_TYPE ) { 254 Integer typeI = Utils.integer(root.getNewTokenType()); 255 stringLiterals.put(lit, typeI); 256 // if string referenced in combined grammar parser rule, 257 // automatically define in the generated lexer 258 root.defineLexerRuleForStringLiteral(lit, typeI); 259 } 260 } 261 } 262 263 @Override aliasTokenIDsAndLiterals(Grammar root)264 protected void aliasTokenIDsAndLiterals(Grammar root) { 265 if ( root.type==Grammar.LEXER ) { 266 return; // strings/chars are never token types in LEXER 267 } 268 // walk aliases if any and assign types to aliased literals if literal 269 // was referenced 270 for (Map.Entry<String, String> entry : aliases.entrySet()) { 271 String tokenID = entry.getKey(); 272 String literal = entry.getValue(); 273 if ( literal.charAt(0)=='\'' && stringLiterals.get(literal)!=null ) { 274 stringLiterals.put(literal, tokens.get(tokenID)); 275 // an alias still means you need a lexer rule for it 276 Integer typeI = tokens.get(tokenID); 277 if ( !tokenRuleDefs.contains(tokenID) ) { 278 root.defineLexerRuleForAliasedStringLiteral(tokenID, literal, typeI); 279 } 280 } 281 } 282 } 283 284 @Override assignTokenIDTypes(Grammar root)285 protected void assignTokenIDTypes(Grammar root) { 286 // walk token names, assigning values if unassigned 287 for (Map.Entry<String, Integer> entry : tokens.entrySet()) { 288 String tokenID = entry.getKey(); 289 if ( entry.getValue()==UNASSIGNED ) { 290 tokens.put(tokenID, Utils.integer(root.getNewTokenType())); 291 } 292 } 293 } 294 295 @Override defineTokenNamesAndLiteralsInGrammar(Grammar root)296 protected void defineTokenNamesAndLiteralsInGrammar(Grammar root) { 297 for (Map.Entry<String, Integer> entry : tokens.entrySet()) { 298 int ttype = entry.getValue(); 299 root.defineToken(entry.getKey(), ttype); 300 } 301 for (Map.Entry<String, Integer> entry : stringLiterals.entrySet()) { 302 String lit = entry.getKey(); 303 int ttype = entry.getValue(); 304 root.defineToken(lit, ttype); 305 } 306 } 307 308 } 309