1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.googlejavaformat.java; 16 17 import static com.google.common.base.Preconditions.checkNotNull; 18 import static com.google.common.collect.Iterables.getLast; 19 import static java.nio.charset.StandardCharsets.UTF_8; 20 21 import com.google.common.base.MoreObjects; 22 import com.google.common.base.Verify; 23 import com.google.common.collect.DiscreteDomain; 24 import com.google.common.collect.ImmutableCollection; 25 import com.google.common.collect.ImmutableList; 26 import com.google.common.collect.ImmutableMap; 27 import com.google.common.collect.ImmutableRangeMap; 28 import com.google.common.collect.ImmutableSet; 29 import com.google.common.collect.Iterators; 30 import com.google.common.collect.Range; 31 import com.google.common.collect.RangeSet; 32 import com.google.common.collect.TreeRangeSet; 33 import com.google.googlejavaformat.Input; 34 import com.google.googlejavaformat.Newlines; 35 import com.google.googlejavaformat.java.JavacTokens.RawTok; 36 import com.sun.tools.javac.file.JavacFileManager; 37 import com.sun.tools.javac.parser.Tokens.TokenKind; 38 import com.sun.tools.javac.tree.JCTree.JCCompilationUnit; 39 import com.sun.tools.javac.util.Context; 40 import com.sun.tools.javac.util.Log; 41 import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler; 42 import com.sun.tools.javac.util.Options; 43 import java.io.IOException; 44 import java.net.URI; 45 import java.util.ArrayList; 46 import java.util.Collection; 47 import java.util.Iterator; 48 import java.util.List; 49 import javax.tools.Diagnostic; 50 import javax.tools.DiagnosticCollector; 51 import javax.tools.DiagnosticListener; 52 import javax.tools.JavaFileObject; 53 import javax.tools.JavaFileObject.Kind; 54 import javax.tools.SimpleJavaFileObject; 55 56 /** {@code JavaInput} extends {@link Input} to represent a Java input document. */ 57 public final class JavaInput extends Input { 58 /** 59 * A {@code JavaInput} is a sequence of {@link Tok}s that cover the Java input. A {@link Tok} is 60 * either a token (if {@code isToken()}), or a non-token, which is a comment (if {@code 61 * isComment()}) or a newline (if {@code isNewline()}) or a maximal sequence of other whitespace 62 * characters (if {@code isSpaces()}). Each {@link Tok} contains a sequence of characters, an 63 * index (sequential starting at {@code 0} for tokens and comments, else {@code -1}), and a 64 * ({@code 0}-origin) position in the input. The concatenation of the texts of all the {@link 65 * Tok}s equals the input. Each Input ends with a token EOF {@link Tok}, with empty text. 66 * 67 * <p>A {@code /*} comment possibly contains newlines; a {@code //} comment does not contain the 68 * terminating newline character, but is followed by a newline {@link Tok}. 69 */ 70 static final class Tok implements Input.Tok { 71 private final int index; 72 private final String originalText; 73 private final String text; 74 private final int position; 75 private final int columnI; 76 private final boolean isToken; 77 private final TokenKind kind; 78 79 /** 80 * The {@code Tok} constructor. 81 * 82 * @param index its index 83 * @param originalText its original text, before removing Unicode escapes 84 * @param text its text after removing Unicode escapes 85 * @param position its {@code 0}-origin position in the input 86 * @param columnI its {@code 0}-origin column number in the input 87 * @param isToken whether the {@code Tok} is a token 88 * @param kind the token kind 89 */ Tok( int index, String originalText, String text, int position, int columnI, boolean isToken, TokenKind kind)90 Tok( 91 int index, 92 String originalText, 93 String text, 94 int position, 95 int columnI, 96 boolean isToken, 97 TokenKind kind) { 98 this.index = index; 99 this.originalText = originalText; 100 this.text = text; 101 this.position = position; 102 this.columnI = columnI; 103 this.isToken = isToken; 104 this.kind = kind; 105 } 106 107 @Override getIndex()108 public int getIndex() { 109 return index; 110 } 111 112 @Override getText()113 public String getText() { 114 return text; 115 } 116 117 @Override getOriginalText()118 public String getOriginalText() { 119 return originalText; 120 } 121 122 @Override length()123 public int length() { 124 return originalText.length(); 125 } 126 127 @Override getPosition()128 public int getPosition() { 129 return position; 130 } 131 132 @Override getColumn()133 public int getColumn() { 134 return columnI; 135 } 136 isToken()137 boolean isToken() { 138 return isToken; 139 } 140 141 @Override isNewline()142 public boolean isNewline() { 143 return Newlines.isNewline(text); 144 } 145 146 @Override isSlashSlashComment()147 public boolean isSlashSlashComment() { 148 return text.startsWith("//"); 149 } 150 151 @Override isSlashStarComment()152 public boolean isSlashStarComment() { 153 return text.startsWith("/*"); 154 } 155 156 @Override isJavadocComment()157 public boolean isJavadocComment() { 158 // comments like `/***` are also javadoc, but their formatting probably won't be improved 159 // by the javadoc formatter 160 return text.startsWith("/**") && text.charAt("/**".length()) != '*' && text.length() > 4; 161 } 162 163 @Override isComment()164 public boolean isComment() { 165 return isSlashSlashComment() || isSlashStarComment(); 166 } 167 168 @Override toString()169 public String toString() { 170 return MoreObjects.toStringHelper(this) 171 .add("index", index) 172 .add("text", text) 173 .add("position", position) 174 .add("columnI", columnI) 175 .add("isToken", isToken) 176 .toString(); 177 } 178 kind()179 public TokenKind kind() { 180 return kind; 181 } 182 } 183 184 /** 185 * A {@link Token} contains a token {@link Tok} and its associated non-tokens; each non-token 186 * {@link Tok} belongs to one {@link Token}. Each {@link Token} has an immutable list of its 187 * non-tokens that appear before it, and another list of its non-tokens that appear after it. The 188 * concatenation of the texts of all the {@link Token}s' {@link Tok}s, each preceded by the texts 189 * of its {@code toksBefore} and followed by the texts of its {@code toksAfter}, equals the input. 190 */ 191 static final class Token implements Input.Token { 192 private final Tok tok; 193 private final ImmutableList<Tok> toksBefore; 194 private final ImmutableList<Tok> toksAfter; 195 196 /** 197 * Token constructor. 198 * 199 * @param toksBefore the earlier non-token {link Tok}s assigned to this {@code Token} 200 * @param tok this token {@link Tok} 201 * @param toksAfter the later non-token {link Tok}s assigned to this {@code Token} 202 */ Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter)203 Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter) { 204 this.toksBefore = ImmutableList.copyOf(toksBefore); 205 this.tok = tok; 206 this.toksAfter = ImmutableList.copyOf(toksAfter); 207 } 208 209 /** 210 * Get the token's {@link Tok}. 211 * 212 * @return the token's {@link Tok} 213 */ 214 @Override getTok()215 public Tok getTok() { 216 return tok; 217 } 218 219 /** 220 * Get the earlier {@link Tok}s assigned to this {@code Token}. 221 * 222 * @return the earlier {@link Tok}s assigned to this {@code Token} 223 */ 224 @Override getToksBefore()225 public ImmutableList<? extends Input.Tok> getToksBefore() { 226 return toksBefore; 227 } 228 229 /** 230 * Get the later {@link Tok}s assigned to this {@code Token}. 231 * 232 * @return the later {@link Tok}s assigned to this {@code Token} 233 */ 234 @Override getToksAfter()235 public ImmutableList<? extends Input.Tok> getToksAfter() { 236 return toksAfter; 237 } 238 239 @Override toString()240 public String toString() { 241 return MoreObjects.toStringHelper(this) 242 .add("tok", tok) 243 .add("toksBefore", toksBefore) 244 .add("toksAfter", toksAfter) 245 .toString(); 246 } 247 } 248 249 private final String text; // The input. 250 private int kN; // The number of numbered toks (tokens or comments), excluding the EOF. 251 252 /* 253 * The following lists record the sequential indices of the {@code Tok}s on each input line. (Only 254 * tokens and comments have sequential indices.) Tokens and {@code //} comments lie on just one 255 * line; {@code /*} comments can lie on multiple lines. These data structures (along with 256 * equivalent ones for the formatted output) let us compute correspondences between the input and 257 * output. 258 */ 259 260 private final ImmutableMap<Integer, Integer> positionToColumnMap; // Map Tok position to column. 261 private final ImmutableList<Token> tokens; // The Tokens for this input. 262 private final ImmutableRangeMap<Integer, Token> positionTokenMap; // Map position to Token. 263 264 /** Map from Tok index to the associated Token. */ 265 private final Token[] kToToken; 266 267 /** 268 * Input constructor. 269 * 270 * @param text the input text 271 * @throws FormatterException if the input cannot be parsed 272 */ JavaInput(String text)273 public JavaInput(String text) throws FormatterException { 274 this.text = checkNotNull(text); 275 setLines(ImmutableList.copyOf(Newlines.lineIterator(text))); 276 ImmutableList<Tok> toks = buildToks(text); 277 positionToColumnMap = makePositionToColumnMap(toks); 278 tokens = buildTokens(toks); 279 ImmutableRangeMap.Builder<Integer, Token> tokenLocations = ImmutableRangeMap.builder(); 280 for (Token token : tokens) { 281 Input.Tok end = JavaOutput.endTok(token); 282 int upper = end.getPosition(); 283 if (!end.getText().isEmpty()) { 284 upper += end.length() - 1; 285 } 286 tokenLocations.put(Range.closed(JavaOutput.startTok(token).getPosition(), upper), token); 287 } 288 positionTokenMap = tokenLocations.build(); 289 290 // adjust kN for EOF 291 kToToken = new Token[kN + 1]; 292 for (Token token : tokens) { 293 for (Input.Tok tok : token.getToksBefore()) { 294 if (tok.getIndex() < 0) { 295 continue; 296 } 297 kToToken[tok.getIndex()] = token; 298 } 299 kToToken[token.getTok().getIndex()] = token; 300 for (Input.Tok tok : token.getToksAfter()) { 301 if (tok.getIndex() < 0) { 302 continue; 303 } 304 kToToken[tok.getIndex()] = token; 305 } 306 } 307 } 308 makePositionToColumnMap(List<Tok> toks)309 private static ImmutableMap<Integer, Integer> makePositionToColumnMap(List<Tok> toks) { 310 ImmutableMap.Builder<Integer, Integer> builder = ImmutableMap.builder(); 311 for (Tok tok : toks) { 312 builder.put(tok.getPosition(), tok.getColumn()); 313 } 314 return builder.build(); 315 } 316 317 /** 318 * Get the input text. 319 * 320 * @return the input text 321 */ 322 @Override getText()323 public String getText() { 324 return text; 325 } 326 327 @Override getPositionToColumnMap()328 public ImmutableMap<Integer, Integer> getPositionToColumnMap() { 329 return positionToColumnMap; 330 } 331 332 /** Lex the input and build the list of toks. */ buildToks(String text)333 private ImmutableList<Tok> buildToks(String text) throws FormatterException { 334 ImmutableList<Tok> toks = buildToks(text, ImmutableSet.of()); 335 kN = getLast(toks).getIndex(); 336 computeRanges(toks); 337 return toks; 338 } 339 340 /** 341 * Lex the input and build the list of toks. 342 * 343 * @param text the text to be lexed. 344 * @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found, 345 * the returned list will include tokens up to but not including that token. 346 */ buildToks(String text, ImmutableSet<TokenKind> stopTokens)347 static ImmutableList<Tok> buildToks(String text, ImmutableSet<TokenKind> stopTokens) 348 throws FormatterException { 349 stopTokens = ImmutableSet.<TokenKind>builder().addAll(stopTokens).add(TokenKind.EOF).build(); 350 Context context = new Context(); 351 Options.instance(context).put("--enable-preview", "true"); 352 new JavacFileManager(context, true, UTF_8); 353 DiagnosticCollector<JavaFileObject> diagnosticCollector = new DiagnosticCollector<>(); 354 context.put(DiagnosticListener.class, diagnosticCollector); 355 Log log = Log.instance(context); 356 log.useSource( 357 new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) { 358 @Override 359 public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException { 360 return text; 361 } 362 }); 363 DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log); 364 ImmutableList<RawTok> rawToks = JavacTokens.getTokens(text, context, stopTokens); 365 if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) { 366 return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF 367 } 368 int kN = 0; 369 List<Tok> toks = new ArrayList<>(); 370 int charI = 0; 371 int columnI = 0; 372 for (RawTok t : rawToks) { 373 if (stopTokens.contains(t.kind())) { 374 break; 375 } 376 int charI0 = t.pos(); 377 // Get string, possibly with Unicode escapes. 378 String originalTokText = text.substring(charI0, t.endPos()); 379 String tokText = 380 t.kind() == TokenKind.STRINGLITERAL 381 ? t.stringVal() // Unicode escapes removed. 382 : originalTokText; 383 char tokText0 = tokText.charAt(0); // The token's first character. 384 final boolean isToken; // Is this tok a token? 385 final boolean isNumbered; // Is this tok numbered? (tokens and comments) 386 String extraNewline = null; // Extra newline at end? 387 List<String> strings = new ArrayList<>(); 388 if (Character.isWhitespace(tokText0)) { 389 isToken = false; 390 isNumbered = false; 391 Iterator<String> it = Newlines.lineIterator(originalTokText); 392 while (it.hasNext()) { 393 String line = it.next(); 394 String newline = Newlines.getLineEnding(line); 395 if (newline != null) { 396 String spaces = line.substring(0, line.length() - newline.length()); 397 if (!spaces.isEmpty()) { 398 strings.add(spaces); 399 } 400 strings.add(newline); 401 } else if (!line.isEmpty()) { 402 strings.add(line); 403 } 404 } 405 } else if (tokText.startsWith("'") || tokText.startsWith("\"")) { 406 isToken = true; 407 isNumbered = true; 408 strings.add(originalTokText); 409 } else if (tokText.startsWith("//") || tokText.startsWith("/*")) { 410 // For compatibility with an earlier lexer, the newline after a // comment is its own tok. 411 if (tokText.startsWith("//") 412 && (originalTokText.endsWith("\n") || originalTokText.endsWith("\r"))) { 413 extraNewline = Newlines.getLineEnding(originalTokText); 414 tokText = tokText.substring(0, tokText.length() - extraNewline.length()); 415 originalTokText = 416 originalTokText.substring(0, originalTokText.length() - extraNewline.length()); 417 } 418 isToken = false; 419 isNumbered = true; 420 strings.add(originalTokText); 421 } else if (Character.isJavaIdentifierStart(tokText0) 422 || Character.isDigit(tokText0) 423 || (tokText0 == '.' && tokText.length() > 1 && Character.isDigit(tokText.charAt(1)))) { 424 // Identifier, keyword, or numeric literal (a dot may begin a number, as in .2D). 425 isToken = true; 426 isNumbered = true; 427 strings.add(tokText); 428 } else { 429 // Other tokens ("+" or "++" or ">>" are broken into one-character toks, because ">>" 430 // cannot be lexed without syntactic knowledge. This implementation fails if the token 431 // contains Unicode escapes. 432 isToken = true; 433 isNumbered = true; 434 for (char c : tokText.toCharArray()) { 435 strings.add(String.valueOf(c)); 436 } 437 } 438 if (strings.size() == 1) { 439 toks.add( 440 new Tok( 441 isNumbered ? kN++ : -1, 442 originalTokText, 443 tokText, 444 charI, 445 columnI, 446 isToken, 447 t.kind())); 448 charI += originalTokText.length(); 449 columnI = updateColumn(columnI, originalTokText); 450 451 } else { 452 if (strings.size() != 1 && !tokText.equals(originalTokText)) { 453 throw new FormatterException( 454 "Unicode escapes not allowed in whitespace or multi-character operators"); 455 } 456 for (String str : strings) { 457 toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null)); 458 charI += str.length(); 459 columnI = updateColumn(columnI, originalTokText); 460 } 461 } 462 if (extraNewline != null) { 463 toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null)); 464 columnI = 0; 465 charI += extraNewline.length(); 466 } 467 } 468 toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok. 469 return ImmutableList.copyOf(toks); 470 } 471 updateColumn(int columnI, String originalTokText)472 private static int updateColumn(int columnI, String originalTokText) { 473 Integer last = Iterators.getLast(Newlines.lineOffsetIterator(originalTokText)); 474 if (last > 0) { 475 columnI = originalTokText.length() - last; 476 } else { 477 columnI += originalTokText.length(); 478 } 479 return columnI; 480 } 481 buildTokens(List<Tok> toks)482 private static ImmutableList<Token> buildTokens(List<Tok> toks) { 483 ImmutableList.Builder<Token> tokens = ImmutableList.builder(); 484 int k = 0; 485 int kN = toks.size(); 486 487 // Remaining non-tokens before the token go here. 488 ImmutableList.Builder<Tok> toksBefore = ImmutableList.builder(); 489 490 OUTERMOST: 491 while (k < kN) { 492 while (!toks.get(k).isToken()) { 493 Tok tok = toks.get(k++); 494 toksBefore.add(tok); 495 if (isParamComment(tok)) { 496 while (toks.get(k).isNewline()) { 497 // drop newlines after parameter comments 498 k++; 499 } 500 } 501 } 502 Tok tok = toks.get(k++); 503 504 // Non-tokens starting on the same line go here too. 505 ImmutableList.Builder<Tok> toksAfter = ImmutableList.builder(); 506 OUTER: 507 while (k < kN && !toks.get(k).isToken()) { 508 // Don't attach inline comments to certain leading tokens, e.g. for `f(/*flag1=*/true). 509 // 510 // Attaching inline comments to the right token is hard, and this barely 511 // scratches the surface. But it's enough to do a better job with parameter 512 // name comments. 513 // 514 // TODO(cushon): find a better strategy. 515 if (toks.get(k).isSlashStarComment()) { 516 switch (tok.getText()) { 517 case "(": 518 case "<": 519 case ".": 520 break OUTER; 521 default: 522 break; 523 } 524 } 525 if (toks.get(k).isJavadocComment()) { 526 switch (tok.getText()) { 527 case ";": 528 break OUTER; 529 default: 530 break; 531 } 532 } 533 if (isParamComment(toks.get(k))) { 534 tokens.add(new Token(toksBefore.build(), tok, toksAfter.build())); 535 toksBefore = ImmutableList.<Tok>builder().add(toks.get(k++)); 536 // drop newlines after parameter comments 537 while (toks.get(k).isNewline()) { 538 k++; 539 } 540 continue OUTERMOST; 541 } 542 Tok nonTokenAfter = toks.get(k++); 543 toksAfter.add(nonTokenAfter); 544 if (Newlines.containsBreaks(nonTokenAfter.getText())) { 545 break; 546 } 547 } 548 tokens.add(new Token(toksBefore.build(), tok, toksAfter.build())); 549 toksBefore = ImmutableList.builder(); 550 } 551 return tokens.build(); 552 } 553 isParamComment(Tok tok)554 private static boolean isParamComment(Tok tok) { 555 return tok.isSlashStarComment() 556 && tok.getText().matches("\\/\\*[A-Za-z0-9\\s_\\-]+=\\s*\\*\\/"); 557 } 558 559 /** 560 * Convert from an offset and length flag pair to a token range. 561 * 562 * @param offset the {@code 0}-based offset in characters 563 * @param length the length in characters 564 * @return the {@code 0}-based {@link Range} of tokens 565 * @throws FormatterException if offset + length is outside the file 566 */ characterRangeToTokenRange(int offset, int length)567 Range<Integer> characterRangeToTokenRange(int offset, int length) throws FormatterException { 568 int requiredLength = offset + length; 569 if (requiredLength > text.length()) { 570 throw new FormatterException( 571 String.format( 572 "error: invalid length %d, offset + length (%d) is outside the file", 573 length, requiredLength)); 574 } 575 if (length < 0) { 576 return EMPTY_RANGE; 577 } 578 if (length == 0) { 579 // 0 stands for "format the line under the cursor" 580 length = 1; 581 } 582 ImmutableCollection<Token> enclosed = 583 getPositionTokenMap() 584 .subRangeMap(Range.closedOpen(offset, offset + length)) 585 .asMapOfRanges() 586 .values(); 587 if (enclosed.isEmpty()) { 588 return EMPTY_RANGE; 589 } 590 return Range.closedOpen( 591 enclosed.iterator().next().getTok().getIndex(), getLast(enclosed).getTok().getIndex() + 1); 592 } 593 594 /** 595 * Get the number of toks. 596 * 597 * @return the number of toks, including the EOF tok 598 */ 599 @Override getkN()600 public int getkN() { 601 return kN; 602 } 603 604 /** 605 * Get the Token by index. 606 * 607 * @param k the token index 608 */ 609 @Override getToken(int k)610 public Token getToken(int k) { 611 return kToToken[k]; 612 } 613 614 /** 615 * Get the input tokens. 616 * 617 * @return the input tokens 618 */ 619 @Override getTokens()620 public ImmutableList<? extends Input.Token> getTokens() { 621 return tokens; 622 } 623 624 /** 625 * Get the navigable map from position to {@link Token}. Used to look for tokens following a given 626 * one, and to implement the --offset and --length flags to reformat a character range in the 627 * input file. 628 * 629 * @return the navigable map from position to {@link Token} 630 */ 631 @Override getPositionTokenMap()632 public ImmutableRangeMap<Integer, Token> getPositionTokenMap() { 633 return positionTokenMap; 634 } 635 636 @Override toString()637 public String toString() { 638 return MoreObjects.toStringHelper(this) 639 .add("tokens", tokens) 640 .add("super", super.toString()) 641 .toString(); 642 } 643 644 private JCCompilationUnit unit; 645 646 @Override getLineNumber(int inputPosition)647 public int getLineNumber(int inputPosition) { 648 Verify.verifyNotNull(unit, "Expected compilation unit to be set."); 649 return unit.getLineMap().getLineNumber(inputPosition); 650 } 651 652 @Override getColumnNumber(int inputPosition)653 public int getColumnNumber(int inputPosition) { 654 Verify.verifyNotNull(unit, "Expected compilation unit to be set."); 655 return unit.getLineMap().getColumnNumber(inputPosition); 656 } 657 658 // TODO(cushon): refactor JavaInput so the CompilationUnit can be passed into 659 // the constructor. setCompilationUnit(JCCompilationUnit unit)660 public void setCompilationUnit(JCCompilationUnit unit) { 661 this.unit = unit; 662 } 663 characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges)664 public RangeSet<Integer> characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges) 665 throws FormatterException { 666 RangeSet<Integer> tokenRangeSet = TreeRangeSet.create(); 667 for (Range<Integer> characterRange0 : characterRanges) { 668 Range<Integer> characterRange = characterRange0.canonical(DiscreteDomain.integers()); 669 tokenRangeSet.add( 670 characterRangeToTokenRange( 671 characterRange.lowerEndpoint(), 672 characterRange.upperEndpoint() - characterRange.lowerEndpoint())); 673 } 674 return tokenRangeSet; 675 } 676 } 677