1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import static com.google.common.base.Verify.verify; 20 import static com.google.common.collect.ImmutableList.toImmutableList; 21 import static com.google.turbine.parse.UnicodeEscapePreprocessor.ASCII_SUB; 22 import static java.lang.Math.min; 23 24 import com.google.common.collect.ImmutableList; 25 import com.google.turbine.diag.SourceFile; 26 import com.google.turbine.diag.TurbineError; 27 import com.google.turbine.diag.TurbineError.ErrorKind; 28 import org.jspecify.nullness.Nullable; 29 30 /** A {@link Lexer} that streams input from a {@link UnicodeEscapePreprocessor}. */ 31 public class StreamLexer implements Lexer { 32 33 private final UnicodeEscapePreprocessor reader; 34 35 /** The current input character. */ 36 private int ch; 37 38 /** The start position of the current token. */ 39 private int position; 40 41 /** The start position of the current numeric literal or identifier token. */ 42 private int readFrom; 43 44 /** The value of the current string or character literal token. */ 45 private String value = null; 46 47 /** A saved javadoc comment. */ 48 private String javadoc = null; 49 StreamLexer(UnicodeEscapePreprocessor reader)50 public StreamLexer(UnicodeEscapePreprocessor reader) { 51 this.reader = reader; 52 eat(); 53 } 54 55 /** Records the value of a literal. */ saveValue(String value)56 private void saveValue(String value) { 57 this.value = value; 58 } 59 60 /** Records the start position of a literal. */ readFrom()61 private void readFrom() { 62 value = null; 63 readFrom = reader.position(); 64 } 65 66 /** Consumes an input character. */ eat()67 private void eat() { 68 ch = reader.next(); 69 } 70 71 @Override javadoc()72 public @Nullable String javadoc() { 73 String result = javadoc; 74 javadoc = null; 75 if (result == null) { 76 return null; 77 } 78 verify(result.endsWith("*/"), result); 79 return result.substring(0, result.length() - "*/".length()); 80 } 81 82 @Override stringValue()83 public String stringValue() { 84 if (value != null) { 85 return value; 86 } 87 return reader.readString(readFrom, reader.position()); 88 } 89 90 @Override position()91 public int position() { 92 return position; 93 } 94 95 @Override source()96 public SourceFile source() { 97 return reader.source(); 98 } 99 100 @Override next()101 public Token next() { 102 OUTER: 103 while (true) { 104 position = reader.position(); 105 switch (ch) { 106 case '\r': 107 case '\n': 108 case ' ': 109 case '\t': 110 case '\f': 111 eat(); 112 continue OUTER; 113 114 case '/': 115 { 116 eat(); 117 switch (ch) { 118 case '/': 119 while (true) { 120 eat(); 121 switch (ch) { 122 case '\n': 123 case '\r': 124 eat(); 125 continue OUTER; 126 case ASCII_SUB: 127 if (reader.done()) { 128 return Token.EOF; 129 } 130 eat(); 131 break; 132 default: // fall out 133 } 134 } 135 case '*': 136 eat(); 137 boolean sawStar = false; 138 boolean isJavadoc = false; 139 if (ch == '*') { 140 eat(); 141 // handle empty non-javadoc comments: `/**/` 142 if (ch == '/') { 143 eat(); 144 continue OUTER; 145 } 146 isJavadoc = true; 147 readFrom(); 148 } 149 while (true) { 150 switch (ch) { 151 case '*': 152 eat(); 153 sawStar = true; 154 break; 155 case '/': 156 eat(); 157 if (sawStar) { 158 if (isJavadoc) { 159 // Save the comment, excluding the leading `/**` and including 160 // the trailing `/*`. The comment is trimmed and normalized later. 161 javadoc = stringValue(); 162 } 163 continue OUTER; 164 } 165 sawStar = false; 166 break; 167 case ASCII_SUB: 168 if (reader.done()) { 169 throw TurbineError.format( 170 reader.source(), position, ErrorKind.UNCLOSED_COMMENT); 171 } 172 eat(); 173 sawStar = false; 174 break; 175 default: 176 eat(); 177 sawStar = false; 178 break; 179 } 180 } 181 default: 182 if (ch == '=') { 183 eat(); 184 return Token.DIVEQ; 185 } 186 return Token.DIV; 187 } 188 } 189 190 case 'a': 191 case 'b': 192 case 'c': 193 case 'd': 194 case 'e': 195 case 'f': 196 case 'g': 197 case 'h': 198 case 'i': 199 case 'j': 200 case 'k': 201 case 'l': 202 case 'm': 203 case 'n': 204 case 'o': 205 case 'p': 206 case 'q': 207 case 'r': 208 case 's': 209 case 't': 210 case 'u': 211 case 'v': 212 case 'w': 213 case 'x': 214 case 'y': 215 case 'z': 216 case 'A': 217 case 'B': 218 case 'C': 219 case 'D': 220 case 'E': 221 case 'F': 222 case 'G': 223 case 'H': 224 case 'I': 225 case 'J': 226 case 'K': 227 case 'L': 228 case 'M': 229 case 'N': 230 case 'O': 231 case 'P': 232 case 'Q': 233 case 'R': 234 case 'S': 235 case 'T': 236 case 'U': 237 case 'V': 238 case 'W': 239 case 'X': 240 case 'Y': 241 case 'Z': 242 case '_': 243 case '$': 244 return identifier(); 245 246 case ASCII_SUB: 247 if (!reader.done()) { 248 throw error(ErrorKind.UNEXPECTED_EOF); 249 } 250 return Token.EOF; 251 252 case '-': 253 case '=': 254 case '>': 255 case '<': 256 case '!': 257 case '~': 258 case '+': 259 case '?': 260 case ':': 261 case '*': 262 case '&': 263 case '|': 264 case '^': 265 case '%': 266 return operator(); 267 case '(': 268 eat(); 269 return Token.LPAREN; 270 case ')': 271 eat(); 272 return Token.RPAREN; 273 case '{': 274 eat(); 275 return Token.LBRACE; 276 case '}': 277 eat(); 278 return Token.RBRACE; 279 case '[': 280 eat(); 281 return Token.LBRACK; 282 case ']': 283 eat(); 284 return Token.RBRACK; 285 case ';': 286 eat(); 287 return Token.SEMI; 288 case ',': 289 eat(); 290 return Token.COMMA; 291 case '@': 292 eat(); 293 return Token.AT; // what about frac, etc.? 294 295 case '0': 296 { 297 readFrom(); 298 eat(); 299 switch (ch) { 300 case 'x': 301 case 'X': 302 eat(); 303 return hexLiteral(); 304 case 'b': 305 case 'B': 306 eat(); 307 return boolLiteral(); 308 case '0': 309 case '1': 310 case '2': 311 case '3': 312 case '4': 313 case '5': 314 case '6': 315 case '7': 316 case '_': 317 return octalLiteral(); 318 case '.': 319 eat(); 320 return floatLiteral(); 321 case 'f': 322 case 'F': 323 eat(); 324 return Token.FLOAT_LITERAL; 325 case 'd': 326 case 'D': 327 eat(); 328 return Token.DOUBLE_LITERAL; 329 case 'l': 330 case 'L': 331 eat(); 332 return Token.LONG_LITERAL; 333 default: 334 return Token.INT_LITERAL; 335 } 336 } 337 case '1': 338 case '2': 339 case '3': 340 case '4': 341 case '5': 342 case '6': 343 case '7': 344 case '8': 345 case '9': 346 readFrom(); 347 return decimalLiteral(); 348 case '.': 349 { 350 readFrom(); 351 eat(); 352 switch (ch) { 353 case '.': 354 { 355 eat(); 356 if (ch == '.') { 357 eat(); 358 return Token.ELLIPSIS; 359 } else { 360 throw inputError(); 361 } 362 } 363 case '0': 364 case '1': 365 case '2': 366 case '3': 367 case '4': 368 case '5': 369 case '6': 370 case '7': 371 case '8': 372 case '9': 373 return floatLiteral(); 374 default: 375 return Token.DOT; 376 } 377 } 378 379 case '\'': 380 { 381 eat(); 382 char value; 383 switch (ch) { 384 case '\\': 385 eat(); 386 value = escape(); 387 break; 388 case '\'': 389 throw error(ErrorKind.EMPTY_CHARACTER_LITERAL); 390 default: 391 value = (char) ch; 392 eat(); 393 } 394 if (ch == '\'') { 395 saveValue(String.valueOf(value)); 396 eat(); 397 return Token.CHAR_LITERAL; 398 } 399 throw error(ErrorKind.UNTERMINATED_CHARACTER_LITERAL); 400 } 401 402 case '"': 403 { 404 eat(); 405 if (ch == '"') { 406 eat(); 407 if (ch != '"') { 408 saveValue(""); 409 return Token.STRING_LITERAL; 410 } 411 eat(); 412 return textBlock(); 413 } 414 readFrom(); 415 StringBuilder sb = new StringBuilder(); 416 STRING: 417 while (true) { 418 switch (ch) { 419 case '\\': 420 eat(); 421 sb.append(escape()); 422 continue STRING; 423 case '"': 424 saveValue(sb.toString()); 425 eat(); 426 return Token.STRING_LITERAL; 427 case '\n': 428 throw error(ErrorKind.UNTERMINATED_STRING); 429 case ASCII_SUB: 430 if (reader.done()) { 431 return Token.EOF; 432 } 433 // falls through 434 default: 435 sb.appendCodePoint(ch); 436 eat(); 437 continue STRING; 438 } 439 } 440 } 441 default: 442 if (Character.isJavaIdentifierStart(ch)) { 443 // TODO(cushon): the style guide disallows non-ascii identifiers 444 return identifier(); 445 } 446 throw inputError(); 447 } 448 } 449 } 450 textBlock()451 private Token textBlock() { 452 OUTER: 453 while (true) { 454 switch (ch) { 455 case ' ': 456 case '\r': 457 case '\t': 458 eat(); 459 break; 460 default: 461 break OUTER; 462 } 463 } 464 switch (ch) { 465 case '\r': 466 eat(); 467 if (ch == '\n') { 468 eat(); 469 } 470 break; 471 case '\n': 472 eat(); 473 break; 474 default: 475 throw inputError(); 476 } 477 readFrom(); 478 StringBuilder sb = new StringBuilder(); 479 while (true) { 480 switch (ch) { 481 case '"': 482 eat(); 483 if (ch != '"') { 484 sb.append("\""); 485 continue; 486 } 487 eat(); 488 if (ch != '"') { 489 sb.append("\"\""); 490 continue; 491 } 492 eat(); 493 String value = sb.toString(); 494 value = stripIndent(value); 495 value = translateEscapes(value); 496 saveValue(value); 497 return Token.STRING_LITERAL; 498 case ASCII_SUB: 499 if (reader.done()) { 500 return Token.EOF; 501 } 502 // falls through 503 default: 504 sb.appendCodePoint(ch); 505 eat(); 506 continue; 507 } 508 } 509 } 510 stripIndent(String value)511 static String stripIndent(String value) { 512 if (value.isEmpty()) { 513 return value; 514 } 515 ImmutableList<String> lines = value.lines().collect(toImmutableList()); 516 // the amount of whitespace to strip from the beginning of every line 517 int strip = Integer.MAX_VALUE; 518 char last = value.charAt(value.length() - 1); 519 boolean trailingNewline = last == '\n' || last == '\r'; 520 if (trailingNewline) { 521 // If the input contains a trailing newline, we have something like: 522 // 523 // |String s = """ 524 // | foo 525 // |"""; 526 // 527 // Because the final """ is unindented, nothing should be stripped. 528 strip = 0; 529 } else { 530 // find the longest common prefix of whitespace across all non-blank lines 531 for (int i = 0; i < lines.size(); i++) { 532 String line = lines.get(i); 533 int nonWhitespaceStart = nonWhitespaceStart(line); 534 if (nonWhitespaceStart == line.length()) { 535 continue; 536 } 537 strip = min(strip, nonWhitespaceStart); 538 } 539 } 540 StringBuilder result = new StringBuilder(); 541 boolean first = true; 542 for (String line : lines) { 543 if (!first) { 544 result.append('\n'); 545 } 546 int end = trailingWhitespaceStart(line); 547 if (strip <= end) { 548 result.append(line, strip, end); 549 } 550 first = false; 551 } 552 if (trailingNewline) { 553 result.append('\n'); 554 } 555 return result.toString(); 556 } 557 nonWhitespaceStart(String value)558 private static int nonWhitespaceStart(String value) { 559 int i = 0; 560 while (i < value.length() && Character.isWhitespace(value.charAt(i))) { 561 i++; 562 } 563 return i; 564 } 565 trailingWhitespaceStart(String value)566 private static int trailingWhitespaceStart(String value) { 567 int i = value.length() - 1; 568 while (i >= 0 && Character.isWhitespace(value.charAt(i))) { 569 i--; 570 } 571 return i + 1; 572 } 573 translateEscapes(String value)574 private static String translateEscapes(String value) { 575 StreamLexer lexer = 576 new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, value + ASCII_SUB))); 577 return lexer.translateEscapes(); 578 } 579 translateEscapes()580 private String translateEscapes() { 581 readFrom(); 582 StringBuilder sb = new StringBuilder(); 583 OUTER: 584 while (true) { 585 switch (ch) { 586 case '\\': 587 eat(); 588 sb.append(escape()); 589 continue; 590 case ASCII_SUB: 591 break OUTER; 592 default: 593 sb.appendCodePoint(ch); 594 eat(); 595 continue; 596 } 597 } 598 return sb.toString(); 599 } 600 escape()601 private char escape() { 602 boolean zeroToThree = false; 603 switch (ch) { 604 case 'b': 605 eat(); 606 return '\b'; 607 case 't': 608 eat(); 609 return '\t'; 610 case 'n': 611 eat(); 612 return '\n'; 613 case 'f': 614 eat(); 615 return '\f'; 616 case 'r': 617 eat(); 618 return '\r'; 619 case '"': 620 eat(); 621 return '\"'; 622 case '\'': 623 eat(); 624 return '\''; 625 case '\\': 626 eat(); 627 return '\\'; 628 case '0': 629 case '1': 630 case '2': 631 case '3': 632 zeroToThree = true; 633 // falls through 634 case '4': 635 case '5': 636 case '6': 637 case '7': 638 { 639 char value = (char) (ch - '0'); 640 eat(); 641 switch (ch) { 642 case '0': 643 case '1': 644 case '2': 645 case '3': 646 case '4': 647 case '5': 648 case '6': 649 case '7': 650 { 651 value = (char) ((value << 3) | (ch - '0')); 652 eat(); 653 if (zeroToThree) { 654 switch (ch) { 655 case '0': 656 case '1': 657 case '2': 658 case '3': 659 case '4': 660 case '5': 661 case '6': 662 case '7': 663 value = (char) ((value << 3) | (ch - '0')); 664 eat(); 665 return value; 666 default: 667 return value; 668 } 669 } 670 } 671 // fall through 672 default: 673 return value; 674 } 675 } 676 default: 677 throw inputError(); 678 } 679 } 680 decimalLiteral()681 private Token decimalLiteral() { 682 readDigits(); 683 switch (ch) { 684 case 'e': 685 case 'E': 686 return floatLiteral(); 687 case '.': 688 eat(); 689 return floatLiteral(); 690 case 'f': 691 case 'F': 692 eat(); 693 return Token.FLOAT_LITERAL; 694 case 'd': 695 case 'D': 696 eat(); 697 return Token.DOUBLE_LITERAL; 698 case 'l': 699 case 'L': 700 eat(); 701 return Token.LONG_LITERAL; 702 default: 703 return Token.INT_LITERAL; 704 } 705 } 706 hexFloatLiteral()707 private Token hexFloatLiteral() { 708 readHexDigits(); 709 switch (ch) { 710 case 'p': 711 case 'P': 712 eat(); 713 signedInteger(); 714 break; 715 default: // fall out 716 } 717 return floatTypeSuffix(); 718 } 719 floatLiteral()720 private Token floatLiteral() { 721 if ('0' <= ch && ch <= '9') { 722 readDigits(); 723 } 724 switch (ch) { 725 case 'e': 726 case 'E': 727 eat(); 728 signedInteger(); 729 break; 730 default: // fall out 731 } 732 return floatTypeSuffix(); 733 } 734 floatTypeSuffix()735 private Token floatTypeSuffix() { 736 switch (ch) { 737 case 'd': 738 case 'D': 739 eat(); 740 return Token.DOUBLE_LITERAL; 741 case 'f': 742 case 'F': 743 eat(); 744 return Token.FLOAT_LITERAL; 745 default: 746 return Token.DOUBLE_LITERAL; 747 } 748 } 749 signedInteger()750 private void signedInteger() { 751 switch (ch) { 752 case '-': 753 case '+': 754 eat(); 755 break; 756 default: 757 break; 758 } 759 readDigits(); 760 } 761 readHexDigits()762 private void readHexDigits() { 763 switch (ch) { 764 case 'A': 765 case 'B': 766 case 'C': 767 case 'D': 768 case 'E': 769 case 'F': 770 case 'a': 771 case 'b': 772 case 'c': 773 case 'd': 774 case 'e': 775 case 'f': 776 case '0': 777 case '1': 778 case '2': 779 case '3': 780 case '4': 781 case '5': 782 case '6': 783 case '7': 784 case '8': 785 case '9': 786 eat(); 787 break; 788 default: 789 throw inputError(); 790 } 791 OUTER: 792 while (true) { 793 switch (ch) { 794 case '_': 795 { 796 do { 797 eat(); 798 } while (ch == '_'); 799 switch (ch) { 800 case 'A': 801 case 'B': 802 case 'C': 803 case 'D': 804 case 'E': 805 case 'F': 806 case 'a': 807 case 'b': 808 case 'c': 809 case 'd': 810 case 'e': 811 case 'f': 812 case '0': 813 case '1': 814 case '2': 815 case '3': 816 case '4': 817 case '5': 818 case '6': 819 case '7': 820 case '8': 821 case '9': 822 continue OUTER; 823 default: 824 throw inputError(); 825 } 826 } 827 case 'A': 828 case 'B': 829 case 'C': 830 case 'D': 831 case 'E': 832 case 'F': 833 case 'a': 834 case 'b': 835 case 'c': 836 case 'd': 837 case 'e': 838 case 'f': 839 case '0': 840 case '1': 841 case '2': 842 case '3': 843 case '4': 844 case '5': 845 case '6': 846 case '7': 847 case '8': 848 case '9': 849 eat(); 850 break; 851 default: 852 return; 853 } 854 } 855 } 856 readDigits()857 private void readDigits() { 858 if ('0' <= ch && ch <= '9') { 859 eat(); 860 } else { 861 throw inputError(); 862 } 863 OUTER: 864 while (true) { 865 switch (ch) { 866 case '_': 867 do { 868 eat(); 869 } while (ch == '_'); 870 if ('0' <= ch && ch <= '9') { 871 continue OUTER; 872 } else { 873 throw inputError(); 874 } 875 case '0': 876 case '1': 877 case '2': 878 case '3': 879 case '4': 880 case '5': 881 case '6': 882 case '7': 883 case '8': 884 case '9': 885 eat(); 886 continue OUTER; 887 default: 888 return; 889 } 890 } 891 } 892 boolLiteral()893 private Token boolLiteral() { 894 readBinaryDigits(); 895 switch (ch) { 896 case 'l': 897 case 'L': 898 eat(); 899 return Token.LONG_LITERAL; 900 default: 901 return Token.INT_LITERAL; 902 } 903 } 904 readBinaryDigits()905 private void readBinaryDigits() { 906 switch (ch) { 907 case '0': 908 case '1': 909 eat(); 910 break; 911 default: 912 throw inputError(); 913 } 914 OUTER: 915 while (true) { 916 switch (ch) { 917 case '_': 918 do { 919 eat(); 920 } while (ch == '_'); 921 switch (ch) { 922 case '0': 923 case '1': 924 continue OUTER; 925 default: 926 throw inputError(); 927 } 928 case '0': 929 case '1': 930 eat(); 931 continue OUTER; 932 default: 933 return; 934 } 935 } 936 } 937 octalLiteral()938 private Token octalLiteral() { 939 readOctalDigits(); 940 switch (ch) { 941 case 'l': 942 case 'L': 943 eat(); 944 return Token.LONG_LITERAL; 945 default: 946 return Token.INT_LITERAL; 947 } 948 } 949 readOctalDigits()950 private void readOctalDigits() { 951 switch (ch) { 952 case '0': 953 case '1': 954 case '2': 955 case '3': 956 case '4': 957 case '5': 958 case '6': 959 case '7': 960 case '_': 961 eat(); 962 break; 963 default: 964 throw inputError(); 965 } 966 OUTER: 967 while (true) { 968 switch (ch) { 969 case '_': 970 do { 971 eat(); 972 } while (ch == '_'); 973 switch (ch) { 974 case '0': 975 case '1': 976 case '2': 977 case '3': 978 case '4': 979 case '5': 980 case '6': 981 case '7': 982 continue OUTER; 983 default: 984 throw inputError(); 985 } 986 case '0': 987 case '1': 988 case '2': 989 case '3': 990 case '4': 991 case '5': 992 case '6': 993 case '7': 994 eat(); 995 continue OUTER; 996 default: 997 return; 998 } 999 } 1000 } 1001 hexLiteral()1002 private Token hexLiteral() { 1003 readHexDigits(); 1004 switch (ch) { 1005 case '.': 1006 eat(); 1007 return hexFloatLiteral(); 1008 case 'l': 1009 case 'L': 1010 eat(); 1011 return Token.LONG_LITERAL; 1012 case 'p': 1013 case 'P': 1014 eat(); 1015 signedInteger(); 1016 return floatTypeSuffix(); 1017 default: 1018 return Token.INT_LITERAL; 1019 } 1020 } 1021 operator()1022 private Token operator() { 1023 switch (ch) { 1024 case '=': 1025 eat(); 1026 if (ch == '=') { 1027 eat(); 1028 return Token.EQ; 1029 } else { 1030 return Token.ASSIGN; 1031 } 1032 case '>': 1033 eat(); 1034 switch (ch) { 1035 case '=': 1036 eat(); 1037 return Token.GTE; 1038 case '>': 1039 eat(); 1040 switch (ch) { 1041 case '>': 1042 eat(); 1043 if (ch == '=') { 1044 eat(); 1045 return Token.GTGTGTE; 1046 } else { 1047 return Token.GTGTGT; 1048 } 1049 case '=': 1050 eat(); 1051 return Token.GTGTE; 1052 default: 1053 return Token.GTGT; 1054 } 1055 default: 1056 return Token.GT; 1057 } 1058 case '<': 1059 eat(); 1060 switch (ch) { 1061 case '=': 1062 eat(); 1063 return Token.LTE; 1064 case '<': 1065 eat(); 1066 if (ch == '=') { 1067 eat(); 1068 return Token.LTLTE; 1069 } else { 1070 return Token.LTLT; 1071 } 1072 default: 1073 return Token.LT; 1074 } 1075 case '!': 1076 eat(); 1077 if (ch == '=') { 1078 eat(); 1079 return Token.NOTEQ; 1080 } else { 1081 return Token.NOT; 1082 } 1083 case '~': 1084 eat(); 1085 return Token.TILDE; 1086 case '?': 1087 eat(); 1088 return Token.COND; 1089 case ':': 1090 eat(); 1091 if (ch == ':') { 1092 eat(); 1093 return Token.COLONCOLON; 1094 } else { 1095 return Token.COLON; 1096 } 1097 case '-': 1098 eat(); 1099 switch (ch) { 1100 case '>': 1101 eat(); 1102 return Token.ARROW; 1103 case '-': 1104 eat(); 1105 return Token.DECR; 1106 case '=': 1107 eat(); 1108 return Token.MINUSEQ; 1109 default: 1110 return Token.MINUS; 1111 } 1112 case '&': 1113 eat(); 1114 switch (ch) { 1115 case '&': 1116 eat(); 1117 return Token.ANDAND; 1118 case '=': 1119 eat(); 1120 return Token.ANDEQ; 1121 default: 1122 return Token.AND; 1123 } 1124 case '|': 1125 eat(); 1126 switch (ch) { 1127 case '=': 1128 eat(); 1129 return Token.OREQ; 1130 case '|': 1131 eat(); 1132 return Token.OROR; 1133 default: 1134 return Token.OR; 1135 } 1136 case '+': 1137 eat(); 1138 switch (ch) { 1139 case '+': 1140 eat(); 1141 return Token.INCR; 1142 case '=': 1143 eat(); 1144 return Token.PLUSEQ; 1145 default: 1146 return Token.PLUS; 1147 } 1148 case '*': 1149 eat(); 1150 if (ch == '=') { 1151 eat(); 1152 return Token.MULTEQ; 1153 } else { 1154 return Token.MULT; 1155 } 1156 case '/': 1157 // handled with comments 1158 throw inputError(); 1159 1160 case '%': 1161 eat(); 1162 if (ch == '=') { 1163 eat(); 1164 return Token.MODEQ; 1165 } else { 1166 return Token.MOD; 1167 } 1168 case '^': 1169 eat(); 1170 if (ch == '=') { 1171 eat(); 1172 return Token.XOREQ; 1173 } else { 1174 return Token.XOR; 1175 } 1176 default: 1177 throw inputError(); 1178 } 1179 } 1180 identifier()1181 private Token identifier() { 1182 readFrom(); 1183 eat(); 1184 // TODO(cushon): the style guide disallows non-ascii identifiers 1185 while (Character.isJavaIdentifierPart(ch)) { 1186 if (ch == ASCII_SUB && reader.done()) { 1187 break; 1188 } 1189 eat(); 1190 } 1191 return makeIdent(stringValue()); 1192 } 1193 makeIdent(String s)1194 private static Token makeIdent(String s) { 1195 switch (s) { 1196 case "abstract": 1197 return Token.ABSTRACT; 1198 case "assert": 1199 return Token.ASSERT; 1200 case "boolean": 1201 return Token.BOOLEAN; 1202 case "break": 1203 return Token.BREAK; 1204 case "byte": 1205 return Token.BYTE; 1206 case "case": 1207 return Token.CASE; 1208 case "catch": 1209 return Token.CATCH; 1210 case "char": 1211 return Token.CHAR; 1212 case "class": 1213 return Token.CLASS; 1214 case "const": 1215 return Token.CONST; 1216 case "continue": 1217 return Token.CONTINUE; 1218 case "default": 1219 return Token.DEFAULT; 1220 case "do": 1221 return Token.DO; 1222 case "double": 1223 return Token.DOUBLE; 1224 case "else": 1225 return Token.ELSE; 1226 case "enum": 1227 return Token.ENUM; 1228 case "extends": 1229 return Token.EXTENDS; 1230 case "final": 1231 return Token.FINAL; 1232 case "finally": 1233 return Token.FINALLY; 1234 case "float": 1235 return Token.FLOAT; 1236 case "for": 1237 return Token.FOR; 1238 case "goto": 1239 return Token.GOTO; 1240 case "if": 1241 return Token.IF; 1242 case "implements": 1243 return Token.IMPLEMENTS; 1244 case "import": 1245 return Token.IMPORT; 1246 case "instanceof": 1247 return Token.INSTANCEOF; 1248 case "int": 1249 return Token.INT; 1250 case "interface": 1251 return Token.INTERFACE; 1252 case "long": 1253 return Token.LONG; 1254 case "native": 1255 return Token.NATIVE; 1256 case "new": 1257 return Token.NEW; 1258 case "package": 1259 return Token.PACKAGE; 1260 case "private": 1261 return Token.PRIVATE; 1262 case "protected": 1263 return Token.PROTECTED; 1264 case "public": 1265 return Token.PUBLIC; 1266 case "return": 1267 return Token.RETURN; 1268 case "short": 1269 return Token.SHORT; 1270 case "static": 1271 return Token.STATIC; 1272 case "strictfp": 1273 return Token.STRICTFP; 1274 case "super": 1275 return Token.SUPER; 1276 case "switch": 1277 return Token.SWITCH; 1278 case "synchronized": 1279 return Token.SYNCHRONIZED; 1280 case "this": 1281 return Token.THIS; 1282 case "throw": 1283 return Token.THROW; 1284 case "throws": 1285 return Token.THROWS; 1286 case "transient": 1287 return Token.TRANSIENT; 1288 case "try": 1289 return Token.TRY; 1290 case "void": 1291 return Token.VOID; 1292 case "volatile": 1293 return Token.VOLATILE; 1294 case "while": 1295 return Token.WHILE; 1296 case "true": 1297 return Token.TRUE; 1298 case "false": 1299 return Token.FALSE; 1300 case "null": 1301 return Token.NULL; 1302 default: 1303 return Token.IDENT; 1304 } 1305 } 1306 inputError()1307 private TurbineError inputError() { 1308 return error( 1309 ErrorKind.UNEXPECTED_INPUT, 1310 Character.isBmpCodePoint(ch) ? Character.toString((char) ch) : String.format("U+%X", ch)); 1311 } 1312 error(ErrorKind kind, Object... args)1313 private TurbineError error(ErrorKind kind, Object... args) { 1314 return TurbineError.format(reader.source(), reader.position(), kind, args); 1315 } 1316 } 1317