1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import static com.google.common.truth.Truth.assertThat; 20 import static org.junit.Assume.assumeTrue; 21 22 import com.google.common.escape.SourceCodeEscapers; 23 import com.google.common.truth.Expect; 24 import com.google.turbine.diag.SourceFile; 25 import java.lang.reflect.Method; 26 import java.util.ArrayList; 27 import java.util.List; 28 import org.junit.Rule; 29 import org.junit.Test; 30 import org.junit.runner.RunWith; 31 import org.junit.runners.JUnit4; 32 33 @RunWith(JUnit4.class) 34 public class LexerTest { 35 36 @Rule public final Expect expect = Expect.create(); 37 38 @Test testSimple()39 public void testSimple() { 40 assertThat(lex("\nasd dsa\n")).containsExactly("IDENT(asd)", "IDENT(dsa)", "EOF"); 41 } 42 43 @Test testOperator()44 public void testOperator() { 45 assertThat(lex("\nasd++asd\n")).containsExactly("IDENT(asd)", "INCR", "IDENT(asd)", "EOF"); 46 } 47 48 @Test boolLiteral()49 public void boolLiteral() { 50 lexerComparisonTest("0b0101__01010"); 51 assertThat(lex("1 + 0b1000100101")) 52 .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(0b1000100101)", "EOF"); 53 } 54 55 @Test octalLiteral()56 public void octalLiteral() { 57 assertThat(lex("1 + 01234567")) 58 .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(01234567)", "EOF"); 59 } 60 61 @Test testLiteral()62 public void testLiteral() { 63 assertThat(lex("0L")).containsExactly("LONG_LITERAL(0L)", "EOF"); 64 assertThat(lex("0")).containsExactly("INT_LITERAL(0)", "EOF"); 65 assertThat(lex("0x7fff_ffff")).containsExactly("INT_LITERAL(0x7fff_ffff)", "EOF"); 66 assertThat(lex("0177_7777_7777")).containsExactly("INT_LITERAL(0177_7777_7777)", "EOF"); 67 assertThat(lex("0b0111_1111_1111_1111_1111_1111_1111_1111")) 68 .containsExactly("INT_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); 69 assertThat(lex("0x8000_0000")).containsExactly("INT_LITERAL(0x8000_0000)", "EOF"); 70 assertThat(lex("0200_0000_0000")).containsExactly("INT_LITERAL(0200_0000_0000)", "EOF"); 71 assertThat(lex("0b1000_0000_0000_0000_0000_0000_0000_0000")) 72 .containsExactly("INT_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000)", "EOF"); 73 assertThat(lex("0xffff_ffff")).containsExactly("INT_LITERAL(0xffff_ffff)", "EOF"); 74 assertThat(lex("0377_7777_7777")).containsExactly("INT_LITERAL(0377_7777_7777)", "EOF"); 75 assertThat(lex("0b1111_1111_1111_1111_1111_1111_1111_1111")) 76 .containsExactly("INT_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); 77 } 78 79 @Test testLong()80 public void testLong() { 81 assertThat(lex("1l")).containsExactly("LONG_LITERAL(1l)", "EOF"); 82 assertThat(lex("9223372036854775807L")) 83 .containsExactly("LONG_LITERAL(9223372036854775807L)", "EOF"); 84 assertThat(lex("-9223372036854775808L")) 85 .containsExactly("MINUS", "LONG_LITERAL(9223372036854775808L)", "EOF"); 86 assertThat(lex("0x7fff_ffff_ffff_ffffL")) 87 .containsExactly("LONG_LITERAL(0x7fff_ffff_ffff_ffffL)", "EOF"); 88 assertThat(lex("07_7777_7777_7777_7777_7777L")) 89 .containsExactly("LONG_LITERAL(07_7777_7777_7777_7777_7777L)", "EOF"); 90 assertThat( 91 lex( 92 "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) 93 .containsExactly( 94 "LONG_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", 95 "EOF"); 96 assertThat(lex("0x8000_0000_0000_0000L")) 97 .containsExactly("LONG_LITERAL(0x8000_0000_0000_0000L)", "EOF"); 98 assertThat(lex("010_0000_0000_0000_0000_0000L")) 99 .containsExactly("LONG_LITERAL(010_0000_0000_0000_0000_0000L)", "EOF"); 100 assertThat( 101 lex( 102 "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L")) 103 .containsExactly( 104 "LONG_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L)", 105 "EOF"); 106 assertThat(lex("0xffff_ffff_ffff_ffffL")) 107 .containsExactly("LONG_LITERAL(0xffff_ffff_ffff_ffffL)", "EOF"); 108 assertThat(lex("017_7777_7777_7777_7777_7777L")) 109 .containsExactly("LONG_LITERAL(017_7777_7777_7777_7777_7777L)", "EOF"); 110 assertThat( 111 lex( 112 "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) 113 .containsExactly( 114 "LONG_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", 115 "EOF"); 116 } 117 118 @Test testDoubleLiteral()119 public void testDoubleLiteral() { 120 assertThat(lex("1D")).containsExactly("DOUBLE_LITERAL(1D)", "EOF"); 121 assertThat(lex("123d")).containsExactly("DOUBLE_LITERAL(123d)", "EOF"); 122 assertThat(lex("1.7976931348623157e308")) 123 .containsExactly("DOUBLE_LITERAL(1.7976931348623157e308)", "EOF"); 124 assertThat(lex("4.9e-324")).containsExactly("DOUBLE_LITERAL(4.9e-324)", "EOF"); 125 } 126 127 @Test testFloatLiteral()128 public void testFloatLiteral() { 129 assertThat(lex("1F")).containsExactly("FLOAT_LITERAL(1F)", "EOF"); 130 assertThat(lex("123f")).containsExactly("FLOAT_LITERAL(123f)", "EOF"); 131 assertThat(lex("3.4028235e38f")).containsExactly("FLOAT_LITERAL(3.4028235e38f)", "EOF"); 132 assertThat(lex("1.40e-45f")).containsExactly("FLOAT_LITERAL(1.40e-45f)", "EOF"); 133 } 134 135 @Test testComment()136 public void testComment() { 137 assertThat(lex("a//comment\nb //comment")).containsExactly("IDENT(a)", "IDENT(b)", "EOF"); 138 assertThat(lex("a/*comment*/\nb /*comment**/c/*asd*/")) 139 .containsExactly("IDENT(a)", "IDENT(b)", "IDENT(c)", "EOF"); 140 } 141 142 @Test testStringLiteral()143 public void testStringLiteral() { 144 assertThat(lex("\"asd\" \"\\n\"")) 145 .containsExactly("STRING_LITERAL(asd)", "STRING_LITERAL(\\n)", "EOF"); 146 } 147 148 @Test charLiteral()149 public void charLiteral() { 150 assertThat(lex("'a' '\\t' '\\r'")) 151 .containsExactly("CHAR_LITERAL(a)", "CHAR_LITERAL(\\t)", "CHAR_LITERAL(\\r)", "EOF"); 152 } 153 154 @Test negativeInt()155 public void negativeInt() { 156 assertThat(lex("(int)-1")) 157 .containsExactly("LPAREN", "INT", "RPAREN", "MINUS", "INT_LITERAL(1)", "EOF"); 158 } 159 160 @Test importStmt()161 public void importStmt() { 162 assertThat(lex("import com.google.Foo;")) 163 .containsExactly( 164 "IMPORT", "IDENT(com)", "DOT", "IDENT(google)", "DOT", "IDENT(Foo)", "SEMI", "EOF"); 165 } 166 167 @Test annotation()168 public void annotation() { 169 assertThat(lex("@GwtCompatible(serializable = true, emulated = true)")) 170 .containsExactly( 171 "AT", 172 "IDENT(GwtCompatible)", 173 "LPAREN", 174 "IDENT(serializable)", 175 "ASSIGN", 176 "TRUE", 177 "COMMA", 178 "IDENT(emulated)", 179 "ASSIGN", 180 "TRUE", 181 "RPAREN", 182 "EOF"); 183 } 184 185 @Test operators()186 public void operators() { 187 assertThat( 188 lex( 189 "= > < ! ~ ? : ->\n" 190 + "== >= <= != && || ++ --\n" 191 + "+ - * / & | ^ % << >> >>>\n" 192 + "+= -= *= /= &= |= ^= %= <<= >>= >>>=")) 193 .containsExactly( 194 "ASSIGN", "GT", "LT", "NOT", "TILDE", "COND", "COLON", "ARROW", "EQ", "GTE", "LTE", 195 "NOTEQ", "ANDAND", "OROR", "INCR", "DECR", "PLUS", "MINUS", "MULT", "DIV", "AND", "OR", 196 "XOR", "MOD", "LTLT", "GTGT", "GTGTGT", "PLUSEQ", "MINUSEQ", "MULTEQ", "DIVEQ", "ANDEQ", 197 "OREQ", "XOREQ", "MODEQ", "LTLTE", "GTGTE", "GTGTGTE", "EOF"); 198 } 199 200 @Test keywords()201 public void keywords() { 202 assertThat( 203 lex( 204 " abstract continue for new switch\n" 205 + " assert default if package synchronized\n" 206 + " boolean do goto private this\n" 207 + " break double implements protected throw\n" 208 + " byte else import public throws\n" 209 + " case enum instanceof return transient\n" 210 + " catch extends int short try\n" 211 + " char final interface static void\n" 212 + " class finally long strictfp volatile\n" 213 + " const float native super while\n" 214 + "= > < ! ~ ? : ->\n")) 215 .containsExactly( 216 "ABSTRACT", 217 "CONTINUE", 218 "FOR", 219 "NEW", 220 "SWITCH", 221 "ASSERT", 222 "DEFAULT", 223 "IF", 224 "PACKAGE", 225 "SYNCHRONIZED", 226 "BOOLEAN", 227 "DO", 228 "GOTO", 229 "PRIVATE", 230 "THIS", 231 "BREAK", 232 "DOUBLE", 233 "IMPLEMENTS", 234 "PROTECTED", 235 "THROW", 236 "BYTE", 237 "ELSE", 238 "IMPORT", 239 "PUBLIC", 240 "THROWS", 241 "CASE", 242 "ENUM", 243 "INSTANCEOF", 244 "RETURN", 245 "TRANSIENT", 246 "CATCH", 247 "EXTENDS", 248 "INT", 249 "SHORT", 250 "TRY", 251 "CHAR", 252 "FINAL", 253 "INTERFACE", 254 "STATIC", 255 "VOID", 256 "CLASS", 257 "FINALLY", 258 "LONG", 259 "STRICTFP", 260 "VOLATILE", 261 "CONST", 262 "FLOAT", 263 "NATIVE", 264 "SUPER", 265 "WHILE", 266 "ASSIGN", 267 "GT", 268 "LT", 269 "NOT", 270 "TILDE", 271 "COND", 272 "COLON", 273 "ARROW", 274 "EOF"); 275 } 276 277 @Test hexFloat()278 public void hexFloat() { 279 lexerComparisonTest("0x1.0p31"); 280 lexerComparisonTest("0x1p31"); 281 } 282 283 @Test zeroFloat()284 public void zeroFloat() { 285 lexerComparisonTest("0f"); 286 } 287 288 @Test escape()289 public void escape() { 290 lexerComparisonTest("'\\b'"); 291 lexerComparisonTest("'\\0'"); 292 lexerComparisonTest("'\\01'"); 293 lexerComparisonTest("'\\001'"); 294 } 295 296 @Test floatLiteral()297 public void floatLiteral() { 298 lexerComparisonTest(".123321f"); 299 lexerComparisonTest(".123321F"); 300 lexerComparisonTest(".123321d"); 301 lexerComparisonTest(".123321D"); 302 lexerComparisonTest("0.0e+1f"); 303 lexerComparisonTest("0.0e-1f"); 304 lexerComparisonTest(".123321"); 305 } 306 307 @Test digitsUnderscore()308 public void digitsUnderscore() { 309 lexerComparisonTest("123__123______3"); 310 } 311 312 @Test moreOperators()313 public void moreOperators() { 314 lexerComparisonTest("* / %"); 315 } 316 317 @Test unusualKeywords()318 public void unusualKeywords() { 319 lexerComparisonTest("const goto assert"); 320 } 321 322 @Test specialCharLiteral()323 public void specialCharLiteral() { 324 lexerComparisonTest("'\\013'"); 325 } 326 327 @Test stringEscape()328 public void stringEscape() { 329 lexerComparisonTest("\"asd\\\"dsa\""); 330 } 331 332 @Test blockCommentEndingSlash()333 public void blockCommentEndingSlash() { 334 lexerComparisonTest("foo /*/*/ bar"); 335 } 336 337 @Test unicode()338 public void unicode() { 339 lexerComparisonTest("import pkg\uD800\uDC00.test;"); 340 } 341 342 @Test javadocUnicodeEscape()343 public void javadocUnicodeEscape() { 344 lexerComparisonTest("class {/***/\\u007D;"); 345 } 346 lexerComparisonTest(String s)347 private void lexerComparisonTest(String s) { 348 assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s)); 349 } 350 lex(String input)351 public static List<String> lex(String input) { 352 Lexer lexer = new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, input))); 353 List<String> tokens = new ArrayList<>(); 354 Token token; 355 do { 356 token = lexer.next(); 357 // Just check that javadoc handling doesn't crash 358 String unused = lexer.javadoc(); 359 String tokenString; 360 switch (token) { 361 case IDENT: 362 case INT_LITERAL: 363 case LONG_LITERAL: 364 case FLOAT_LITERAL: 365 case DOUBLE_LITERAL: 366 tokenString = String.format("%s(%s)", token.name(), lexer.stringValue()); 367 break; 368 case CHAR_LITERAL: 369 case STRING_LITERAL: 370 tokenString = 371 String.format( 372 "%s(%s)", 373 token.name(), SourceCodeEscapers.javaCharEscaper().escape(lexer.stringValue())); 374 break; 375 default: 376 tokenString = token.name(); 377 break; 378 } 379 tokens.add(tokenString); 380 } while (token != Token.EOF); 381 return tokens; 382 } 383 384 @Test stripIndent()385 public void stripIndent() throws Exception { 386 assumeTrue(Runtime.version().feature() >= 13); 387 String[] inputs = { 388 "", 389 "hello", 390 "hello\n", 391 "\nhello", 392 "\n hello\n world", 393 "\n hello\n world\n ", 394 "\n hello\n world\n", 395 "\n hello\n world\n ", 396 "\n hello\nworld", 397 "\n hello\n \nworld\n ", 398 }; 399 Method stripIndent = String.class.getMethod("stripIndent"); 400 for (String input : inputs) { 401 expect.that(StreamLexer.stripIndent(input)).isEqualTo(stripIndent.invoke(input)); 402 } 403 } 404 } 405