• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.turbine.parse;
18 
19 import com.google.common.base.Function;
20 import com.google.common.collect.Lists;
21 import com.google.common.escape.SourceCodeEscapers;
22 import com.sun.tools.javac.parser.Scanner;
23 import com.sun.tools.javac.parser.ScannerFactory;
24 import com.sun.tools.javac.parser.Tokens;
25 import com.sun.tools.javac.util.Context;
26 import java.util.ArrayList;
27 import java.util.List;
28 
29 /** A javac-based reference lexer. */
30 public final class JavacLexer {
31 
javacLex(final String input)32   static List<String> javacLex(final String input) {
33     Context context = new Context();
34     Scanner scanner =
35         ScannerFactory.instance(context).newScanner(input, /*keepDocComments=*/ false);
36     List<Tokens.Token> tokens = new ArrayList<>();
37     do {
38       scanner.nextToken();
39       tokens.add(scanner.token());
40     } while (scanner.token().kind != Tokens.TokenKind.EOF);
41     return Lists.transform(
42         tokens,
43         new Function<Tokens.Token, String>() {
44           @Override
45           public String apply(Tokens.Token token) {
46             return printToken(input, token);
47           }
48         });
49   }
50 
51   private static String printToken(String input, Tokens.Token token) {
52     switch (token.kind) {
53       case IDENTIFIER:
54         return String.format("IDENT(%s)", token.name());
55       case EOF:
56         return "EOF";
57       case ERROR:
58         return "ERROR";
59       case ABSTRACT:
60         return "ABSTRACT";
61       case ASSERT:
62         return "ASSERT";
63       case BOOLEAN:
64         return "BOOLEAN";
65       case BREAK:
66         return "BREAK";
67       case BYTE:
68         return "BYTE";
69       case CASE:
70         return "CASE";
71       case CATCH:
72         return "CATCH";
73       case CHAR:
74         return "CHAR";
75       case CLASS:
76         return "CLASS";
77       case CONST:
78         return "CONST";
79       case CONTINUE:
80         return "CONTINUE";
81       case DEFAULT:
82         return "DEFAULT";
83       case DO:
84         return "DO";
85       case DOUBLE:
86         return "DOUBLE";
87       case ELSE:
88         return "ELSE";
89       case ENUM:
90         return "ENUM";
91       case EXTENDS:
92         return "EXTENDS";
93       case FINAL:
94         return "FINAL";
95       case FINALLY:
96         return "FINALLY";
97       case FLOAT:
98         return "FLOAT";
99       case FOR:
100         return "FOR";
101       case GOTO:
102         return "GOTO";
103       case IF:
104         return "IF";
105       case IMPLEMENTS:
106         return "IMPLEMENTS";
107       case IMPORT:
108         return "IMPORT";
109       case INSTANCEOF:
110         return "INSTANCEOF";
111       case INT:
112         return "INT";
113       case INTERFACE:
114         return "INTERFACE";
115       case LONG:
116         return "LONG";
117       case NATIVE:
118         return "NATIVE";
119       case NEW:
120         return "NEW";
121       case PACKAGE:
122         return "PACKAGE";
123       case PRIVATE:
124         return "PRIVATE";
125       case PROTECTED:
126         return "PROTECTED";
127       case PUBLIC:
128         return "PUBLIC";
129       case RETURN:
130         return "RETURN";
131       case SHORT:
132         return "SHORT";
133       case STATIC:
134         return "STATIC";
135       case STRICTFP:
136         return "STRICTFP";
137       case SUPER:
138         return "SUPER";
139       case SWITCH:
140         return "SWITCH";
141       case SYNCHRONIZED:
142         return "SYNCHRONIZED";
143       case THIS:
144         return "THIS";
145       case THROW:
146         return "THROW";
147       case THROWS:
148         return "THROWS";
149       case TRANSIENT:
150         return "TRANSIENT";
151       case TRY:
152         return "TRY";
153       case VOID:
154         return "VOID";
155       case VOLATILE:
156         return "VOLATILE";
157       case WHILE:
158         return "WHILE";
159       case TRUE:
160         return "TRUE";
161       case FALSE:
162         return "FALSE";
163       case NULL:
164         return "NULL";
165       case UNDERSCORE:
166         return "UNDERSCORE";
167       case ARROW:
168         return "ARROW";
169       case COLCOL:
170         return "COLCOL";
171       case LPAREN:
172         return "LPAREN";
173       case RPAREN:
174         return "RPAREN";
175       case LBRACE:
176         return "LBRACE";
177       case RBRACE:
178         return "RBRACE";
179       case LBRACKET:
180         return "LBRACK";
181       case RBRACKET:
182         return "RBRACK";
183       case SEMI:
184         return "SEMI";
185       case COMMA:
186         return "COMMA";
187       case DOT:
188         return "DOT";
189       case ELLIPSIS:
190         return "ELLIPSIS";
191       case EQ:
192         return "ASSIGN";
193       case GT:
194         return "GT";
195       case LT:
196         return "LT";
197       case BANG:
198         return "NOT";
199       case TILDE:
200         return "TILDE";
201       case QUES:
202         return "COND";
203       case COLON:
204         return "COLON";
205       case EQEQ:
206         return "EQ";
207       case LTEQ:
208         return "LTE";
209       case GTEQ:
210         return "GTE";
211       case BANGEQ:
212         return "NOTEQ";
213       case AMPAMP:
214         return "ANDAND";
215       case BARBAR:
216         return "OROR";
217       case PLUSPLUS:
218         return "INCR";
219       case SUBSUB:
220         return "DECR";
221       case PLUS:
222         return "PLUS";
223       case SUB:
224         return "MINUS";
225       case STAR:
226         return "MULT";
227       case SLASH:
228         return "DIV";
229       case AMP:
230         return "AND";
231       case BAR:
232         return "OR";
233       case CARET:
234         return "XOR";
235       case PERCENT:
236         return "MOD";
237       case LTLT:
238         return "LTLT";
239       case GTGT:
240         return "GTGT";
241       case GTGTGT:
242         return "GTGTGT";
243       case PLUSEQ:
244         return "PLUSEQ";
245       case SUBEQ:
246         return "MINUSEQ";
247       case STAREQ:
248         return "MULTEQ";
249       case SLASHEQ:
250         return "DIVEQ";
251       case AMPEQ:
252         return "ANDEQ";
253       case BAREQ:
254         return "OREQ";
255       case CARETEQ:
256         return "XOREQ";
257       case PERCENTEQ:
258         return "MODEQ";
259       case LTLTEQ:
260         return "LTLTE";
261       case GTGTEQ:
262         return "GTGTE";
263       case GTGTGTEQ:
264         return "GTGTGTE";
265       case MONKEYS_AT:
266         return "AT";
267       case CUSTOM:
268         return "CUSTOM";
269       case STRINGLITERAL:
270         return String.format(
271             "STRING_LITERAL(%s)", SourceCodeEscapers.javaCharEscaper().escape(token.stringVal()));
272       case INTLITERAL:
273         return String.format("INT_LITERAL(%s)", input.substring(token.pos, token.endPos));
274       case LONGLITERAL:
275         return String.format("LONG_LITERAL(%s)", input.substring(token.pos, token.endPos));
276       case FLOATLITERAL:
277         return String.format("FLOAT_LITERAL(%s)", input.substring(token.pos, token.endPos));
278       case DOUBLELITERAL:
279         return String.format("DOUBLE_LITERAL(%s)", input.substring(token.pos, token.endPos));
280       case CHARLITERAL:
281         return String.format(
282             "CHAR_LITERAL(%s)", SourceCodeEscapers.javaCharEscaper().escape(token.stringVal()));
283     }
284     return token.kind.toString();
285   }
286 
287   private JavacLexer() {}
288 }
289