• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2015 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.googlejavaformat.java;
16 
17 import static com.google.common.base.Preconditions.checkNotNull;
18 import static com.google.common.collect.Iterables.getLast;
19 import static java.nio.charset.StandardCharsets.UTF_8;
20 
21 import com.google.common.base.MoreObjects;
22 import com.google.common.base.Verify;
23 import com.google.common.collect.DiscreteDomain;
24 import com.google.common.collect.ImmutableCollection;
25 import com.google.common.collect.ImmutableList;
26 import com.google.common.collect.ImmutableMap;
27 import com.google.common.collect.ImmutableRangeMap;
28 import com.google.common.collect.ImmutableSet;
29 import com.google.common.collect.Iterators;
30 import com.google.common.collect.Range;
31 import com.google.common.collect.RangeSet;
32 import com.google.common.collect.TreeRangeSet;
33 import com.google.googlejavaformat.Input;
34 import com.google.googlejavaformat.Newlines;
35 import com.google.googlejavaformat.java.JavacTokens.RawTok;
36 import com.sun.tools.javac.file.JavacFileManager;
37 import com.sun.tools.javac.parser.Tokens.TokenKind;
38 import com.sun.tools.javac.tree.JCTree.JCCompilationUnit;
39 import com.sun.tools.javac.util.Context;
40 import com.sun.tools.javac.util.Log;
41 import com.sun.tools.javac.util.Log.DeferredDiagnosticHandler;
42 import com.sun.tools.javac.util.Options;
43 import java.io.IOException;
44 import java.net.URI;
45 import java.util.ArrayList;
46 import java.util.Collection;
47 import java.util.Iterator;
48 import java.util.List;
49 import javax.tools.Diagnostic;
50 import javax.tools.DiagnosticCollector;
51 import javax.tools.DiagnosticListener;
52 import javax.tools.JavaFileObject;
53 import javax.tools.JavaFileObject.Kind;
54 import javax.tools.SimpleJavaFileObject;
55 
56 /** {@code JavaInput} extends {@link Input} to represent a Java input document. */
57 public final class JavaInput extends Input {
58   /**
59    * A {@code JavaInput} is a sequence of {@link Tok}s that cover the Java input. A {@link Tok} is
60    * either a token (if {@code isToken()}), or a non-token, which is a comment (if {@code
61    * isComment()}) or a newline (if {@code isNewline()}) or a maximal sequence of other whitespace
62    * characters (if {@code isSpaces()}). Each {@link Tok} contains a sequence of characters, an
63    * index (sequential starting at {@code 0} for tokens and comments, else {@code -1}), and a
64    * ({@code 0}-origin) position in the input. The concatenation of the texts of all the {@link
65    * Tok}s equals the input. Each Input ends with a token EOF {@link Tok}, with empty text.
66    *
67    * <p>A {@code /*} comment possibly contains newlines; a {@code //} comment does not contain the
68    * terminating newline character, but is followed by a newline {@link Tok}.
69    */
70   static final class Tok implements Input.Tok {
71     private final int index;
72     private final String originalText;
73     private final String text;
74     private final int position;
75     private final int columnI;
76     private final boolean isToken;
77     private final TokenKind kind;
78 
79     /**
80      * The {@code Tok} constructor.
81      *
82      * @param index its index
83      * @param originalText its original text, before removing Unicode escapes
84      * @param text its text after removing Unicode escapes
85      * @param position its {@code 0}-origin position in the input
86      * @param columnI its {@code 0}-origin column number in the input
87      * @param isToken whether the {@code Tok} is a token
88      * @param kind the token kind
89      */
Tok( int index, String originalText, String text, int position, int columnI, boolean isToken, TokenKind kind)90     Tok(
91         int index,
92         String originalText,
93         String text,
94         int position,
95         int columnI,
96         boolean isToken,
97         TokenKind kind) {
98       this.index = index;
99       this.originalText = originalText;
100       this.text = text;
101       this.position = position;
102       this.columnI = columnI;
103       this.isToken = isToken;
104       this.kind = kind;
105     }
106 
107     @Override
getIndex()108     public int getIndex() {
109       return index;
110     }
111 
112     @Override
getText()113     public String getText() {
114       return text;
115     }
116 
117     @Override
getOriginalText()118     public String getOriginalText() {
119       return originalText;
120     }
121 
122     @Override
length()123     public int length() {
124       return originalText.length();
125     }
126 
127     @Override
getPosition()128     public int getPosition() {
129       return position;
130     }
131 
132     @Override
getColumn()133     public int getColumn() {
134       return columnI;
135     }
136 
isToken()137     boolean isToken() {
138       return isToken;
139     }
140 
141     @Override
isNewline()142     public boolean isNewline() {
143       return Newlines.isNewline(text);
144     }
145 
146     @Override
isSlashSlashComment()147     public boolean isSlashSlashComment() {
148       return text.startsWith("//");
149     }
150 
151     @Override
isSlashStarComment()152     public boolean isSlashStarComment() {
153       return text.startsWith("/*");
154     }
155 
156     @Override
isJavadocComment()157     public boolean isJavadocComment() {
158       // comments like `/***` are also javadoc, but their formatting probably won't be improved
159       // by the javadoc formatter
160       return text.startsWith("/**") && text.charAt("/**".length()) != '*' && text.length() > 4;
161     }
162 
163     @Override
isComment()164     public boolean isComment() {
165       return isSlashSlashComment() || isSlashStarComment();
166     }
167 
168     @Override
toString()169     public String toString() {
170       return MoreObjects.toStringHelper(this)
171           .add("index", index)
172           .add("text", text)
173           .add("position", position)
174           .add("columnI", columnI)
175           .add("isToken", isToken)
176           .toString();
177     }
178 
kind()179     public TokenKind kind() {
180       return kind;
181     }
182   }
183 
184   /**
185    * A {@link Token} contains a token {@link Tok} and its associated non-tokens; each non-token
186    * {@link Tok} belongs to one {@link Token}. Each {@link Token} has an immutable list of its
187    * non-tokens that appear before it, and another list of its non-tokens that appear after it. The
188    * concatenation of the texts of all the {@link Token}s' {@link Tok}s, each preceded by the texts
189    * of its {@code toksBefore} and followed by the texts of its {@code toksAfter}, equals the input.
190    */
191   static final class Token implements Input.Token {
192     private final Tok tok;
193     private final ImmutableList<Tok> toksBefore;
194     private final ImmutableList<Tok> toksAfter;
195 
196     /**
197      * Token constructor.
198      *
199      * @param toksBefore the earlier non-token {link Tok}s assigned to this {@code Token}
200      * @param tok this token {@link Tok}
201      * @param toksAfter the later non-token {link Tok}s assigned to this {@code Token}
202      */
Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter)203     Token(List<Tok> toksBefore, Tok tok, List<Tok> toksAfter) {
204       this.toksBefore = ImmutableList.copyOf(toksBefore);
205       this.tok = tok;
206       this.toksAfter = ImmutableList.copyOf(toksAfter);
207     }
208 
209     /**
210      * Get the token's {@link Tok}.
211      *
212      * @return the token's {@link Tok}
213      */
214     @Override
getTok()215     public Tok getTok() {
216       return tok;
217     }
218 
219     /**
220      * Get the earlier {@link Tok}s assigned to this {@code Token}.
221      *
222      * @return the earlier {@link Tok}s assigned to this {@code Token}
223      */
224     @Override
getToksBefore()225     public ImmutableList<? extends Input.Tok> getToksBefore() {
226       return toksBefore;
227     }
228 
229     /**
230      * Get the later {@link Tok}s assigned to this {@code Token}.
231      *
232      * @return the later {@link Tok}s assigned to this {@code Token}
233      */
234     @Override
getToksAfter()235     public ImmutableList<? extends Input.Tok> getToksAfter() {
236       return toksAfter;
237     }
238 
239     @Override
toString()240     public String toString() {
241       return MoreObjects.toStringHelper(this)
242           .add("tok", tok)
243           .add("toksBefore", toksBefore)
244           .add("toksAfter", toksAfter)
245           .toString();
246     }
247   }
248 
249   private final String text; // The input.
250   private int kN; // The number of numbered toks (tokens or comments), excluding the EOF.
251 
252   /*
253    * The following lists record the sequential indices of the {@code Tok}s on each input line. (Only
254    * tokens and comments have sequential indices.) Tokens and {@code //} comments lie on just one
255    * line; {@code /*} comments can lie on multiple lines. These data structures (along with
256    * equivalent ones for the formatted output) let us compute correspondences between the input and
257    * output.
258    */
259 
260   private final ImmutableMap<Integer, Integer> positionToColumnMap; // Map Tok position to column.
261   private final ImmutableList<Token> tokens; // The Tokens for this input.
262   private final ImmutableRangeMap<Integer, Token> positionTokenMap; // Map position to Token.
263 
264   /** Map from Tok index to the associated Token. */
265   private final Token[] kToToken;
266 
267   /**
268    * Input constructor.
269    *
270    * @param text the input text
271    * @throws FormatterException if the input cannot be parsed
272    */
JavaInput(String text)273   public JavaInput(String text) throws FormatterException {
274     this.text = checkNotNull(text);
275     setLines(ImmutableList.copyOf(Newlines.lineIterator(text)));
276     ImmutableList<Tok> toks = buildToks(text);
277     positionToColumnMap = makePositionToColumnMap(toks);
278     tokens = buildTokens(toks);
279     ImmutableRangeMap.Builder<Integer, Token> tokenLocations = ImmutableRangeMap.builder();
280     for (Token token : tokens) {
281       Input.Tok end = JavaOutput.endTok(token);
282       int upper = end.getPosition();
283       if (!end.getText().isEmpty()) {
284         upper += end.length() - 1;
285       }
286       tokenLocations.put(Range.closed(JavaOutput.startTok(token).getPosition(), upper), token);
287     }
288     positionTokenMap = tokenLocations.build();
289 
290     // adjust kN for EOF
291     kToToken = new Token[kN + 1];
292     for (Token token : tokens) {
293       for (Input.Tok tok : token.getToksBefore()) {
294         if (tok.getIndex() < 0) {
295           continue;
296         }
297         kToToken[tok.getIndex()] = token;
298       }
299       kToToken[token.getTok().getIndex()] = token;
300       for (Input.Tok tok : token.getToksAfter()) {
301         if (tok.getIndex() < 0) {
302           continue;
303         }
304         kToToken[tok.getIndex()] = token;
305       }
306     }
307   }
308 
makePositionToColumnMap(List<Tok> toks)309   private static ImmutableMap<Integer, Integer> makePositionToColumnMap(List<Tok> toks) {
310     ImmutableMap.Builder<Integer, Integer> builder = ImmutableMap.builder();
311     for (Tok tok : toks) {
312       builder.put(tok.getPosition(), tok.getColumn());
313     }
314     return builder.build();
315   }
316 
317   /**
318    * Get the input text.
319    *
320    * @return the input text
321    */
322   @Override
getText()323   public String getText() {
324     return text;
325   }
326 
327   @Override
getPositionToColumnMap()328   public ImmutableMap<Integer, Integer> getPositionToColumnMap() {
329     return positionToColumnMap;
330   }
331 
332   /** Lex the input and build the list of toks. */
buildToks(String text)333   private ImmutableList<Tok> buildToks(String text) throws FormatterException {
334     ImmutableList<Tok> toks = buildToks(text, ImmutableSet.of());
335     kN = getLast(toks).getIndex();
336     computeRanges(toks);
337     return toks;
338   }
339 
340   /**
341    * Lex the input and build the list of toks.
342    *
343    * @param text the text to be lexed.
344    * @param stopTokens a set of tokens which should cause lexing to stop. If one of these is found,
345    *     the returned list will include tokens up to but not including that token.
346    */
buildToks(String text, ImmutableSet<TokenKind> stopTokens)347   static ImmutableList<Tok> buildToks(String text, ImmutableSet<TokenKind> stopTokens)
348       throws FormatterException {
349     stopTokens = ImmutableSet.<TokenKind>builder().addAll(stopTokens).add(TokenKind.EOF).build();
350     Context context = new Context();
351     Options.instance(context).put("--enable-preview", "true");
352     new JavacFileManager(context, true, UTF_8);
353     DiagnosticCollector<JavaFileObject> diagnosticCollector = new DiagnosticCollector<>();
354     context.put(DiagnosticListener.class, diagnosticCollector);
355     Log log = Log.instance(context);
356     log.useSource(
357         new SimpleJavaFileObject(URI.create("Source.java"), Kind.SOURCE) {
358           @Override
359           public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
360             return text;
361           }
362         });
363     DeferredDiagnosticHandler diagnostics = new DeferredDiagnosticHandler(log);
364     ImmutableList<RawTok> rawToks = JavacTokens.getTokens(text, context, stopTokens);
365     if (diagnostics.getDiagnostics().stream().anyMatch(d -> d.getKind() == Diagnostic.Kind.ERROR)) {
366       return ImmutableList.of(new Tok(0, "", "", 0, 0, true, null)); // EOF
367     }
368     int kN = 0;
369     List<Tok> toks = new ArrayList<>();
370     int charI = 0;
371     int columnI = 0;
372     for (RawTok t : rawToks) {
373       if (stopTokens.contains(t.kind())) {
374         break;
375       }
376       int charI0 = t.pos();
377       // Get string, possibly with Unicode escapes.
378       String originalTokText = text.substring(charI0, t.endPos());
379       String tokText =
380           t.kind() == TokenKind.STRINGLITERAL
381               ? t.stringVal() // Unicode escapes removed.
382               : originalTokText;
383       char tokText0 = tokText.charAt(0); // The token's first character.
384       final boolean isToken; // Is this tok a token?
385       final boolean isNumbered; // Is this tok numbered? (tokens and comments)
386       String extraNewline = null; // Extra newline at end?
387       List<String> strings = new ArrayList<>();
388       if (Character.isWhitespace(tokText0)) {
389         isToken = false;
390         isNumbered = false;
391         Iterator<String> it = Newlines.lineIterator(originalTokText);
392         while (it.hasNext()) {
393           String line = it.next();
394           String newline = Newlines.getLineEnding(line);
395           if (newline != null) {
396             String spaces = line.substring(0, line.length() - newline.length());
397             if (!spaces.isEmpty()) {
398               strings.add(spaces);
399             }
400             strings.add(newline);
401           } else if (!line.isEmpty()) {
402             strings.add(line);
403           }
404         }
405       } else if (tokText.startsWith("'") || tokText.startsWith("\"")) {
406         isToken = true;
407         isNumbered = true;
408         strings.add(originalTokText);
409       } else if (tokText.startsWith("//") || tokText.startsWith("/*")) {
410         // For compatibility with an earlier lexer, the newline after a // comment is its own tok.
411         if (tokText.startsWith("//")
412             && (originalTokText.endsWith("\n") || originalTokText.endsWith("\r"))) {
413           extraNewline = Newlines.getLineEnding(originalTokText);
414           tokText = tokText.substring(0, tokText.length() - extraNewline.length());
415           originalTokText =
416               originalTokText.substring(0, originalTokText.length() - extraNewline.length());
417         }
418         isToken = false;
419         isNumbered = true;
420         strings.add(originalTokText);
421       } else if (Character.isJavaIdentifierStart(tokText0)
422           || Character.isDigit(tokText0)
423           || (tokText0 == '.' && tokText.length() > 1 && Character.isDigit(tokText.charAt(1)))) {
424         // Identifier, keyword, or numeric literal (a dot may begin a number, as in .2D).
425         isToken = true;
426         isNumbered = true;
427         strings.add(tokText);
428       } else {
429         // Other tokens ("+" or "++" or ">>" are broken into one-character toks, because ">>"
430         // cannot be lexed without syntactic knowledge. This implementation fails if the token
431         // contains Unicode escapes.
432         isToken = true;
433         isNumbered = true;
434         for (char c : tokText.toCharArray()) {
435           strings.add(String.valueOf(c));
436         }
437       }
438       if (strings.size() == 1) {
439         toks.add(
440             new Tok(
441                 isNumbered ? kN++ : -1,
442                 originalTokText,
443                 tokText,
444                 charI,
445                 columnI,
446                 isToken,
447                 t.kind()));
448         charI += originalTokText.length();
449         columnI = updateColumn(columnI, originalTokText);
450 
451       } else {
452         if (strings.size() != 1 && !tokText.equals(originalTokText)) {
453           throw new FormatterException(
454               "Unicode escapes not allowed in whitespace or multi-character operators");
455         }
456         for (String str : strings) {
457           toks.add(new Tok(isNumbered ? kN++ : -1, str, str, charI, columnI, isToken, null));
458           charI += str.length();
459           columnI = updateColumn(columnI, originalTokText);
460         }
461       }
462       if (extraNewline != null) {
463         toks.add(new Tok(-1, extraNewline, extraNewline, charI, columnI, false, null));
464         columnI = 0;
465         charI += extraNewline.length();
466       }
467     }
468     toks.add(new Tok(kN, "", "", charI, columnI, true, null)); // EOF tok.
469     return ImmutableList.copyOf(toks);
470   }
471 
updateColumn(int columnI, String originalTokText)472   private static int updateColumn(int columnI, String originalTokText) {
473     Integer last = Iterators.getLast(Newlines.lineOffsetIterator(originalTokText));
474     if (last > 0) {
475       columnI = originalTokText.length() - last;
476     } else {
477       columnI += originalTokText.length();
478     }
479     return columnI;
480   }
481 
buildTokens(List<Tok> toks)482   private static ImmutableList<Token> buildTokens(List<Tok> toks) {
483     ImmutableList.Builder<Token> tokens = ImmutableList.builder();
484     int k = 0;
485     int kN = toks.size();
486 
487     // Remaining non-tokens before the token go here.
488     ImmutableList.Builder<Tok> toksBefore = ImmutableList.builder();
489 
490     OUTERMOST:
491     while (k < kN) {
492       while (!toks.get(k).isToken()) {
493         Tok tok = toks.get(k++);
494         toksBefore.add(tok);
495         if (isParamComment(tok)) {
496           while (toks.get(k).isNewline()) {
497             // drop newlines after parameter comments
498             k++;
499           }
500         }
501       }
502       Tok tok = toks.get(k++);
503 
504       // Non-tokens starting on the same line go here too.
505       ImmutableList.Builder<Tok> toksAfter = ImmutableList.builder();
506       OUTER:
507       while (k < kN && !toks.get(k).isToken()) {
508         // Don't attach inline comments to certain leading tokens, e.g. for `f(/*flag1=*/true).
509         //
510         // Attaching inline comments to the right token is hard, and this barely
511         // scratches the surface. But it's enough to do a better job with parameter
512         // name comments.
513         //
514         // TODO(cushon): find a better strategy.
515         if (toks.get(k).isSlashStarComment()) {
516           switch (tok.getText()) {
517             case "(":
518             case "<":
519             case ".":
520               break OUTER;
521             default:
522               break;
523           }
524         }
525         if (toks.get(k).isJavadocComment()) {
526           switch (tok.getText()) {
527             case ";":
528               break OUTER;
529             default:
530               break;
531           }
532         }
533         if (isParamComment(toks.get(k))) {
534           tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
535           toksBefore = ImmutableList.<Tok>builder().add(toks.get(k++));
536           // drop newlines after parameter comments
537           while (toks.get(k).isNewline()) {
538             k++;
539           }
540           continue OUTERMOST;
541         }
542         Tok nonTokenAfter = toks.get(k++);
543         toksAfter.add(nonTokenAfter);
544         if (Newlines.containsBreaks(nonTokenAfter.getText())) {
545           break;
546         }
547       }
548       tokens.add(new Token(toksBefore.build(), tok, toksAfter.build()));
549       toksBefore = ImmutableList.builder();
550     }
551     return tokens.build();
552   }
553 
isParamComment(Tok tok)554   private static boolean isParamComment(Tok tok) {
555     return tok.isSlashStarComment()
556         && tok.getText().matches("\\/\\*[A-Za-z0-9\\s_\\-]+=\\s*\\*\\/");
557   }
558 
559   /**
560    * Convert from an offset and length flag pair to a token range.
561    *
562    * @param offset the {@code 0}-based offset in characters
563    * @param length the length in characters
564    * @return the {@code 0}-based {@link Range} of tokens
565    * @throws FormatterException if offset + length is outside the file
566    */
characterRangeToTokenRange(int offset, int length)567   Range<Integer> characterRangeToTokenRange(int offset, int length) throws FormatterException {
568     int requiredLength = offset + length;
569     if (requiredLength > text.length()) {
570       throw new FormatterException(
571           String.format(
572               "error: invalid length %d, offset + length (%d) is outside the file",
573               length, requiredLength));
574     }
575     if (length < 0) {
576       return EMPTY_RANGE;
577     }
578     if (length == 0) {
579       // 0 stands for "format the line under the cursor"
580       length = 1;
581     }
582     ImmutableCollection<Token> enclosed =
583         getPositionTokenMap()
584             .subRangeMap(Range.closedOpen(offset, offset + length))
585             .asMapOfRanges()
586             .values();
587     if (enclosed.isEmpty()) {
588       return EMPTY_RANGE;
589     }
590     return Range.closedOpen(
591         enclosed.iterator().next().getTok().getIndex(), getLast(enclosed).getTok().getIndex() + 1);
592   }
593 
594   /**
595    * Get the number of toks.
596    *
597    * @return the number of toks, including the EOF tok
598    */
599   @Override
getkN()600   public int getkN() {
601     return kN;
602   }
603 
604   /**
605    * Get the Token by index.
606    *
607    * @param k the token index
608    */
609   @Override
getToken(int k)610   public Token getToken(int k) {
611     return kToToken[k];
612   }
613 
614   /**
615    * Get the input tokens.
616    *
617    * @return the input tokens
618    */
619   @Override
getTokens()620   public ImmutableList<? extends Input.Token> getTokens() {
621     return tokens;
622   }
623 
624   /**
625    * Get the navigable map from position to {@link Token}. Used to look for tokens following a given
626    * one, and to implement the --offset and --length flags to reformat a character range in the
627    * input file.
628    *
629    * @return the navigable map from position to {@link Token}
630    */
631   @Override
getPositionTokenMap()632   public ImmutableRangeMap<Integer, Token> getPositionTokenMap() {
633     return positionTokenMap;
634   }
635 
636   @Override
toString()637   public String toString() {
638     return MoreObjects.toStringHelper(this)
639         .add("tokens", tokens)
640         .add("super", super.toString())
641         .toString();
642   }
643 
644   private JCCompilationUnit unit;
645 
646   @Override
getLineNumber(int inputPosition)647   public int getLineNumber(int inputPosition) {
648     Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
649     return unit.getLineMap().getLineNumber(inputPosition);
650   }
651 
652   @Override
getColumnNumber(int inputPosition)653   public int getColumnNumber(int inputPosition) {
654     Verify.verifyNotNull(unit, "Expected compilation unit to be set.");
655     return unit.getLineMap().getColumnNumber(inputPosition);
656   }
657 
658   // TODO(cushon): refactor JavaInput so the CompilationUnit can be passed into
659   // the constructor.
setCompilationUnit(JCCompilationUnit unit)660   public void setCompilationUnit(JCCompilationUnit unit) {
661     this.unit = unit;
662   }
663 
characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges)664   public RangeSet<Integer> characterRangesToTokenRanges(Collection<Range<Integer>> characterRanges)
665       throws FormatterException {
666     RangeSet<Integer> tokenRangeSet = TreeRangeSet.create();
667     for (Range<Integer> characterRange0 : characterRanges) {
668       Range<Integer> characterRange = characterRange0.canonical(DiscreteDomain.integers());
669       tokenRangeSet.add(
670           characterRangeToTokenRange(
671               characterRange.lowerEndpoint(),
672               characterRange.upperEndpoint() - characterRange.lowerEndpoint()));
673     }
674     return tokenRangeSet;
675   }
676 }
677