1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import com.google.common.base.Function; 20 import com.google.common.collect.Lists; 21 import com.google.common.escape.SourceCodeEscapers; 22 import com.sun.tools.javac.parser.Scanner; 23 import com.sun.tools.javac.parser.ScannerFactory; 24 import com.sun.tools.javac.parser.Tokens; 25 import com.sun.tools.javac.util.Context; 26 import java.util.ArrayList; 27 import java.util.List; 28 29 /** A javac-based reference lexer. */ 30 public final class JavacLexer { 31 javacLex(final String input)32 static List<String> javacLex(final String input) { 33 Context context = new Context(); 34 Scanner scanner = 35 ScannerFactory.instance(context).newScanner(input, /*keepDocComments=*/ false); 36 List<Tokens.Token> tokens = new ArrayList<>(); 37 do { 38 scanner.nextToken(); 39 tokens.add(scanner.token()); 40 } while (scanner.token().kind != Tokens.TokenKind.EOF); 41 return Lists.transform( 42 tokens, 43 new Function<Tokens.Token, String>() { 44 @Override 45 public String apply(Tokens.Token token) { 46 return printToken(input, token); 47 } 48 }); 49 } 50 51 private static String printToken(String input, Tokens.Token token) { 52 switch (token.kind) { 53 case IDENTIFIER: 54 return String.format("IDENT(%s)", token.name()); 55 case EOF: 56 return "EOF"; 57 case ERROR: 58 return "ERROR"; 59 case ABSTRACT: 60 return "ABSTRACT"; 61 case ASSERT: 62 return "ASSERT"; 63 case BOOLEAN: 64 return "BOOLEAN"; 65 case BREAK: 66 return "BREAK"; 67 case BYTE: 68 return "BYTE"; 69 case CASE: 70 return "CASE"; 71 case CATCH: 72 return "CATCH"; 73 case CHAR: 74 return "CHAR"; 75 case CLASS: 76 return "CLASS"; 77 case CONST: 78 return "CONST"; 79 case CONTINUE: 80 return "CONTINUE"; 81 case DEFAULT: 82 return "DEFAULT"; 83 case DO: 84 return "DO"; 85 case DOUBLE: 86 return "DOUBLE"; 87 case ELSE: 88 return "ELSE"; 89 case ENUM: 90 return "ENUM"; 91 case EXTENDS: 92 return "EXTENDS"; 93 case FINAL: 94 return "FINAL"; 95 case FINALLY: 96 return "FINALLY"; 97 case FLOAT: 98 return "FLOAT"; 99 case FOR: 100 return "FOR"; 101 case GOTO: 102 return "GOTO"; 103 case IF: 104 return "IF"; 105 case IMPLEMENTS: 106 return "IMPLEMENTS"; 107 case IMPORT: 108 return "IMPORT"; 109 case INSTANCEOF: 110 return "INSTANCEOF"; 111 case INT: 112 return "INT"; 113 case INTERFACE: 114 return "INTERFACE"; 115 case LONG: 116 return "LONG"; 117 case NATIVE: 118 return "NATIVE"; 119 case NEW: 120 return "NEW"; 121 case PACKAGE: 122 return "PACKAGE"; 123 case PRIVATE: 124 return "PRIVATE"; 125 case PROTECTED: 126 return "PROTECTED"; 127 case PUBLIC: 128 return "PUBLIC"; 129 case RETURN: 130 return "RETURN"; 131 case SHORT: 132 return "SHORT"; 133 case STATIC: 134 return "STATIC"; 135 case STRICTFP: 136 return "STRICTFP"; 137 case SUPER: 138 return "SUPER"; 139 case SWITCH: 140 return "SWITCH"; 141 case SYNCHRONIZED: 142 return "SYNCHRONIZED"; 143 case THIS: 144 return "THIS"; 145 case THROW: 146 return "THROW"; 147 case THROWS: 148 return "THROWS"; 149 case TRANSIENT: 150 return "TRANSIENT"; 151 case TRY: 152 return "TRY"; 153 case VOID: 154 return "VOID"; 155 case VOLATILE: 156 return "VOLATILE"; 157 case WHILE: 158 return "WHILE"; 159 case TRUE: 160 return "TRUE"; 161 case FALSE: 162 return "FALSE"; 163 case NULL: 164 return "NULL"; 165 case UNDERSCORE: 166 return "UNDERSCORE"; 167 case ARROW: 168 return "ARROW"; 169 case COLCOL: 170 return "COLCOL"; 171 case LPAREN: 172 return "LPAREN"; 173 case RPAREN: 174 return "RPAREN"; 175 case LBRACE: 176 return "LBRACE"; 177 case RBRACE: 178 return "RBRACE"; 179 case LBRACKET: 180 return "LBRACK"; 181 case RBRACKET: 182 return "RBRACK"; 183 case SEMI: 184 return "SEMI"; 185 case COMMA: 186 return "COMMA"; 187 case DOT: 188 return "DOT"; 189 case ELLIPSIS: 190 return "ELLIPSIS"; 191 case EQ: 192 return "ASSIGN"; 193 case GT: 194 return "GT"; 195 case LT: 196 return "LT"; 197 case BANG: 198 return "NOT"; 199 case TILDE: 200 return "TILDE"; 201 case QUES: 202 return "COND"; 203 case COLON: 204 return "COLON"; 205 case EQEQ: 206 return "EQ"; 207 case LTEQ: 208 return "LTE"; 209 case GTEQ: 210 return "GTE"; 211 case BANGEQ: 212 return "NOTEQ"; 213 case AMPAMP: 214 return "ANDAND"; 215 case BARBAR: 216 return "OROR"; 217 case PLUSPLUS: 218 return "INCR"; 219 case SUBSUB: 220 return "DECR"; 221 case PLUS: 222 return "PLUS"; 223 case SUB: 224 return "MINUS"; 225 case STAR: 226 return "MULT"; 227 case SLASH: 228 return "DIV"; 229 case AMP: 230 return "AND"; 231 case BAR: 232 return "OR"; 233 case CARET: 234 return "XOR"; 235 case PERCENT: 236 return "MOD"; 237 case LTLT: 238 return "LTLT"; 239 case GTGT: 240 return "GTGT"; 241 case GTGTGT: 242 return "GTGTGT"; 243 case PLUSEQ: 244 return "PLUSEQ"; 245 case SUBEQ: 246 return "MINUSEQ"; 247 case STAREQ: 248 return "MULTEQ"; 249 case SLASHEQ: 250 return "DIVEQ"; 251 case AMPEQ: 252 return "ANDEQ"; 253 case BAREQ: 254 return "OREQ"; 255 case CARETEQ: 256 return "XOREQ"; 257 case PERCENTEQ: 258 return "MODEQ"; 259 case LTLTEQ: 260 return "LTLTE"; 261 case GTGTEQ: 262 return "GTGTE"; 263 case GTGTGTEQ: 264 return "GTGTGTE"; 265 case MONKEYS_AT: 266 return "AT"; 267 case CUSTOM: 268 return "CUSTOM"; 269 case STRINGLITERAL: 270 return String.format( 271 "STRING_LITERAL(%s)", SourceCodeEscapers.javaCharEscaper().escape(token.stringVal())); 272 case INTLITERAL: 273 return String.format("INT_LITERAL(%s)", input.substring(token.pos, token.endPos)); 274 case LONGLITERAL: 275 return String.format("LONG_LITERAL(%s)", input.substring(token.pos, token.endPos)); 276 case FLOATLITERAL: 277 return String.format("FLOAT_LITERAL(%s)", input.substring(token.pos, token.endPos)); 278 case DOUBLELITERAL: 279 return String.format("DOUBLE_LITERAL(%s)", input.substring(token.pos, token.endPos)); 280 case CHARLITERAL: 281 return String.format( 282 "CHAR_LITERAL(%s)", SourceCodeEscapers.javaCharEscaper().escape(token.stringVal())); 283 } 284 return token.kind.toString(); 285 } 286 287 private JavacLexer() {} 288 } 289