• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.turbine.parse;
18 
19 import static com.google.common.truth.Truth.assertThat;
20 import static org.junit.Assume.assumeTrue;
21 
22 import com.google.common.escape.SourceCodeEscapers;
23 import com.google.common.truth.Expect;
24 import com.google.turbine.diag.SourceFile;
25 import java.lang.reflect.Method;
26 import java.util.ArrayList;
27 import java.util.List;
28 import org.junit.Rule;
29 import org.junit.Test;
30 import org.junit.runner.RunWith;
31 import org.junit.runners.JUnit4;
32 
33 @RunWith(JUnit4.class)
34 public class LexerTest {
35 
36   @Rule public final Expect expect = Expect.create();
37 
38   @Test
testSimple()39   public void testSimple() {
40     assertThat(lex("\nasd dsa\n")).containsExactly("IDENT(asd)", "IDENT(dsa)", "EOF");
41   }
42 
43   @Test
testOperator()44   public void testOperator() {
45     assertThat(lex("\nasd++asd\n")).containsExactly("IDENT(asd)", "INCR", "IDENT(asd)", "EOF");
46   }
47 
48   @Test
boolLiteral()49   public void boolLiteral() {
50     lexerComparisonTest("0b0101__01010");
51     assertThat(lex("1 + 0b1000100101"))
52         .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(0b1000100101)", "EOF");
53   }
54 
55   @Test
octalLiteral()56   public void octalLiteral() {
57     assertThat(lex("1 + 01234567"))
58         .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(01234567)", "EOF");
59   }
60 
61   @Test
testLiteral()62   public void testLiteral() {
63     assertThat(lex("0L")).containsExactly("LONG_LITERAL(0L)", "EOF");
64     assertThat(lex("0")).containsExactly("INT_LITERAL(0)", "EOF");
65     assertThat(lex("0x7fff_ffff")).containsExactly("INT_LITERAL(0x7fff_ffff)", "EOF");
66     assertThat(lex("0177_7777_7777")).containsExactly("INT_LITERAL(0177_7777_7777)", "EOF");
67     assertThat(lex("0b0111_1111_1111_1111_1111_1111_1111_1111"))
68         .containsExactly("INT_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111)", "EOF");
69     assertThat(lex("0x8000_0000")).containsExactly("INT_LITERAL(0x8000_0000)", "EOF");
70     assertThat(lex("0200_0000_0000")).containsExactly("INT_LITERAL(0200_0000_0000)", "EOF");
71     assertThat(lex("0b1000_0000_0000_0000_0000_0000_0000_0000"))
72         .containsExactly("INT_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000)", "EOF");
73     assertThat(lex("0xffff_ffff")).containsExactly("INT_LITERAL(0xffff_ffff)", "EOF");
74     assertThat(lex("0377_7777_7777")).containsExactly("INT_LITERAL(0377_7777_7777)", "EOF");
75     assertThat(lex("0b1111_1111_1111_1111_1111_1111_1111_1111"))
76         .containsExactly("INT_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111)", "EOF");
77   }
78 
79   @Test
testLong()80   public void testLong() {
81     assertThat(lex("1l")).containsExactly("LONG_LITERAL(1l)", "EOF");
82     assertThat(lex("9223372036854775807L"))
83         .containsExactly("LONG_LITERAL(9223372036854775807L)", "EOF");
84     assertThat(lex("-9223372036854775808L"))
85         .containsExactly("MINUS", "LONG_LITERAL(9223372036854775808L)", "EOF");
86     assertThat(lex("0x7fff_ffff_ffff_ffffL"))
87         .containsExactly("LONG_LITERAL(0x7fff_ffff_ffff_ffffL)", "EOF");
88     assertThat(lex("07_7777_7777_7777_7777_7777L"))
89         .containsExactly("LONG_LITERAL(07_7777_7777_7777_7777_7777L)", "EOF");
90     assertThat(
91             lex(
92                 "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L"))
93         .containsExactly(
94             "LONG_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)",
95             "EOF");
96     assertThat(lex("0x8000_0000_0000_0000L"))
97         .containsExactly("LONG_LITERAL(0x8000_0000_0000_0000L)", "EOF");
98     assertThat(lex("010_0000_0000_0000_0000_0000L"))
99         .containsExactly("LONG_LITERAL(010_0000_0000_0000_0000_0000L)", "EOF");
100     assertThat(
101             lex(
102                 "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L"))
103         .containsExactly(
104             "LONG_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L)",
105             "EOF");
106     assertThat(lex("0xffff_ffff_ffff_ffffL"))
107         .containsExactly("LONG_LITERAL(0xffff_ffff_ffff_ffffL)", "EOF");
108     assertThat(lex("017_7777_7777_7777_7777_7777L"))
109         .containsExactly("LONG_LITERAL(017_7777_7777_7777_7777_7777L)", "EOF");
110     assertThat(
111             lex(
112                 "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L"))
113         .containsExactly(
114             "LONG_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)",
115             "EOF");
116   }
117 
118   @Test
testDoubleLiteral()119   public void testDoubleLiteral() {
120     assertThat(lex("1D")).containsExactly("DOUBLE_LITERAL(1D)", "EOF");
121     assertThat(lex("123d")).containsExactly("DOUBLE_LITERAL(123d)", "EOF");
122     assertThat(lex("1.7976931348623157e308"))
123         .containsExactly("DOUBLE_LITERAL(1.7976931348623157e308)", "EOF");
124     assertThat(lex("4.9e-324")).containsExactly("DOUBLE_LITERAL(4.9e-324)", "EOF");
125   }
126 
127   @Test
testFloatLiteral()128   public void testFloatLiteral() {
129     assertThat(lex("1F")).containsExactly("FLOAT_LITERAL(1F)", "EOF");
130     assertThat(lex("123f")).containsExactly("FLOAT_LITERAL(123f)", "EOF");
131     assertThat(lex("3.4028235e38f")).containsExactly("FLOAT_LITERAL(3.4028235e38f)", "EOF");
132     assertThat(lex("1.40e-45f")).containsExactly("FLOAT_LITERAL(1.40e-45f)", "EOF");
133   }
134 
135   @Test
testComment()136   public void testComment() {
137     assertThat(lex("a//comment\nb //comment")).containsExactly("IDENT(a)", "IDENT(b)", "EOF");
138     assertThat(lex("a/*comment*/\nb /*comment**/c/*asd*/"))
139         .containsExactly("IDENT(a)", "IDENT(b)", "IDENT(c)", "EOF");
140   }
141 
142   @Test
testStringLiteral()143   public void testStringLiteral() {
144     assertThat(lex("\"asd\" \"\\n\""))
145         .containsExactly("STRING_LITERAL(asd)", "STRING_LITERAL(\\n)", "EOF");
146   }
147 
148   @Test
charLiteral()149   public void charLiteral() {
150     assertThat(lex("'a' '\\t' '\\r'"))
151         .containsExactly("CHAR_LITERAL(a)", "CHAR_LITERAL(\\t)", "CHAR_LITERAL(\\r)", "EOF");
152   }
153 
154   @Test
negativeInt()155   public void negativeInt() {
156     assertThat(lex("(int)-1"))
157         .containsExactly("LPAREN", "INT", "RPAREN", "MINUS", "INT_LITERAL(1)", "EOF");
158   }
159 
160   @Test
importStmt()161   public void importStmt() {
162     assertThat(lex("import com.google.Foo;"))
163         .containsExactly(
164             "IMPORT", "IDENT(com)", "DOT", "IDENT(google)", "DOT", "IDENT(Foo)", "SEMI", "EOF");
165   }
166 
167   @Test
annotation()168   public void annotation() {
169     assertThat(lex("@GwtCompatible(serializable = true, emulated = true)"))
170         .containsExactly(
171             "AT",
172             "IDENT(GwtCompatible)",
173             "LPAREN",
174             "IDENT(serializable)",
175             "ASSIGN",
176             "TRUE",
177             "COMMA",
178             "IDENT(emulated)",
179             "ASSIGN",
180             "TRUE",
181             "RPAREN",
182             "EOF");
183   }
184 
185   @Test
operators()186   public void operators() {
187     assertThat(
188             lex(
189                 "=   >   <   !   ~   ?   :   ->\n"
190                     + "==  >=  <=  !=  &&  ||  ++  --\n"
191                     + "+   -   *   /   &   |   ^   %   <<   >>   >>>\n"
192                     + "+=  -=  *=  /=  &=  |=  ^=  %=  <<=  >>=  >>>="))
193         .containsExactly(
194             "ASSIGN", "GT", "LT", "NOT", "TILDE", "COND", "COLON", "ARROW", "EQ", "GTE", "LTE",
195             "NOTEQ", "ANDAND", "OROR", "INCR", "DECR", "PLUS", "MINUS", "MULT", "DIV", "AND", "OR",
196             "XOR", "MOD", "LTLT", "GTGT", "GTGTGT", "PLUSEQ", "MINUSEQ", "MULTEQ", "DIVEQ", "ANDEQ",
197             "OREQ", "XOREQ", "MODEQ", "LTLTE", "GTGTE", "GTGTGTE", "EOF");
198   }
199 
200   @Test
keywords()201   public void keywords() {
202     assertThat(
203             lex(
204                 "    abstract   continue   for          new         switch\n"
205                     + "    assert     default    if           package     synchronized\n"
206                     + "    boolean    do         goto         private     this\n"
207                     + "    break      double     implements   protected   throw\n"
208                     + "    byte       else       import       public      throws\n"
209                     + "    case       enum       instanceof   return      transient\n"
210                     + "    catch      extends    int          short       try\n"
211                     + "    char       final      interface    static      void\n"
212                     + "    class      finally    long         strictfp    volatile\n"
213                     + "    const      float      native       super       while\n"
214                     + "=   >   <   !   ~   ?   :   ->\n"))
215         .containsExactly(
216             "ABSTRACT",
217             "CONTINUE",
218             "FOR",
219             "NEW",
220             "SWITCH",
221             "ASSERT",
222             "DEFAULT",
223             "IF",
224             "PACKAGE",
225             "SYNCHRONIZED",
226             "BOOLEAN",
227             "DO",
228             "GOTO",
229             "PRIVATE",
230             "THIS",
231             "BREAK",
232             "DOUBLE",
233             "IMPLEMENTS",
234             "PROTECTED",
235             "THROW",
236             "BYTE",
237             "ELSE",
238             "IMPORT",
239             "PUBLIC",
240             "THROWS",
241             "CASE",
242             "ENUM",
243             "INSTANCEOF",
244             "RETURN",
245             "TRANSIENT",
246             "CATCH",
247             "EXTENDS",
248             "INT",
249             "SHORT",
250             "TRY",
251             "CHAR",
252             "FINAL",
253             "INTERFACE",
254             "STATIC",
255             "VOID",
256             "CLASS",
257             "FINALLY",
258             "LONG",
259             "STRICTFP",
260             "VOLATILE",
261             "CONST",
262             "FLOAT",
263             "NATIVE",
264             "SUPER",
265             "WHILE",
266             "ASSIGN",
267             "GT",
268             "LT",
269             "NOT",
270             "TILDE",
271             "COND",
272             "COLON",
273             "ARROW",
274             "EOF");
275   }
276 
277   @Test
hexFloat()278   public void hexFloat() {
279     lexerComparisonTest("0x1.0p31");
280     lexerComparisonTest("0x1p31");
281   }
282 
283   @Test
zeroFloat()284   public void zeroFloat() {
285     lexerComparisonTest("0f");
286   }
287 
288   @Test
escape()289   public void escape() {
290     lexerComparisonTest("'\\b'");
291     lexerComparisonTest("'\\0'");
292     lexerComparisonTest("'\\01'");
293     lexerComparisonTest("'\\001'");
294   }
295 
296   @Test
floatLiteral()297   public void floatLiteral() {
298     lexerComparisonTest(".123321f");
299     lexerComparisonTest(".123321F");
300     lexerComparisonTest(".123321d");
301     lexerComparisonTest(".123321D");
302     lexerComparisonTest("0.0e+1f");
303     lexerComparisonTest("0.0e-1f");
304     lexerComparisonTest(".123321");
305   }
306 
307   @Test
digitsUnderscore()308   public void digitsUnderscore() {
309     lexerComparisonTest("123__123______3");
310   }
311 
312   @Test
moreOperators()313   public void moreOperators() {
314     lexerComparisonTest("* / %");
315   }
316 
317   @Test
unusualKeywords()318   public void unusualKeywords() {
319     lexerComparisonTest("const goto assert");
320   }
321 
322   @Test
specialCharLiteral()323   public void specialCharLiteral() {
324     lexerComparisonTest("'\\013'");
325   }
326 
327   @Test
stringEscape()328   public void stringEscape() {
329     lexerComparisonTest("\"asd\\\"dsa\"");
330   }
331 
332   @Test
blockCommentEndingSlash()333   public void blockCommentEndingSlash() {
334     lexerComparisonTest("foo /*/*/ bar");
335   }
336 
337   @Test
unicode()338   public void unicode() {
339     lexerComparisonTest("import pkg\uD800\uDC00.test;");
340   }
341 
342   @Test
javadocUnicodeEscape()343   public void javadocUnicodeEscape() {
344     lexerComparisonTest("class {/***/\\u007D;");
345   }
346 
lexerComparisonTest(String s)347   private void lexerComparisonTest(String s) {
348     assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s));
349   }
350 
lex(String input)351   public static List<String> lex(String input) {
352     Lexer lexer = new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, input)));
353     List<String> tokens = new ArrayList<>();
354     Token token;
355     do {
356       token = lexer.next();
357       // Just check that javadoc handling doesn't crash
358       String unused = lexer.javadoc();
359       String tokenString;
360       switch (token) {
361         case IDENT:
362         case INT_LITERAL:
363         case LONG_LITERAL:
364         case FLOAT_LITERAL:
365         case DOUBLE_LITERAL:
366           tokenString = String.format("%s(%s)", token.name(), lexer.stringValue());
367           break;
368         case CHAR_LITERAL:
369         case STRING_LITERAL:
370           tokenString =
371               String.format(
372                   "%s(%s)",
373                   token.name(), SourceCodeEscapers.javaCharEscaper().escape(lexer.stringValue()));
374           break;
375         default:
376           tokenString = token.name();
377           break;
378       }
379       tokens.add(tokenString);
380     } while (token != Token.EOF);
381     return tokens;
382   }
383 
384   @Test
stripIndent()385   public void stripIndent() throws Exception {
386     assumeTrue(Runtime.version().feature() >= 13);
387     String[] inputs = {
388       "",
389       "hello",
390       "hello\n",
391       "\nhello",
392       "\n    hello\n    world",
393       "\n    hello\n    world\n    ",
394       "\n    hello\n    world\n",
395       "\n    hello\n     world\n     ",
396       "\n    hello\nworld",
397       "\n    hello\n     \nworld\n     ",
398     };
399     Method stripIndent = String.class.getMethod("stripIndent");
400     for (String input : inputs) {
401       expect.that(StreamLexer.stripIndent(input)).isEqualTo(stripIndent.invoke(input));
402     }
403   }
404 }
405