• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.googlejavaformat.java;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 import static java.util.Arrays.stream;
19 
20 import com.google.common.collect.ImmutableList;
21 import com.google.common.collect.Lists;
22 import com.sun.tools.javac.parser.JavaTokenizer;
23 import com.sun.tools.javac.parser.Scanner;
24 import com.sun.tools.javac.parser.ScannerFactory;
25 import com.sun.tools.javac.parser.Tokens.Comment;
26 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
27 import com.sun.tools.javac.parser.Tokens.Token;
28 import com.sun.tools.javac.parser.Tokens.TokenKind;
29 import com.sun.tools.javac.parser.UnicodeReader;
30 import com.sun.tools.javac.util.Context;
31 import java.util.Objects;
32 import java.util.Set;
33 
34 /** A wrapper around javac's lexer. */
35 class JavacTokens {
36 
37   /** The lexer eats terminal comments, so feed it one we don't care about. */
38   // TODO(b/33103797): fix javac and remove the work-around
39   private static final CharSequence EOF_COMMENT = "\n//EOF";
40 
41   /** An unprocessed input token, including whitespace and comments. */
42   static class RawTok {
43     private final String stringVal;
44     private final TokenKind kind;
45     private final int pos;
46     private final int endPos;
47 
RawTok(String stringVal, TokenKind kind, int pos, int endPos)48     RawTok(String stringVal, TokenKind kind, int pos, int endPos) {
49       this.stringVal = stringVal;
50       this.kind = kind;
51       this.pos = pos;
52       this.endPos = endPos;
53     }
54 
55     /** The token kind, or {@code null} for whitespace and comments. */
kind()56     public TokenKind kind() {
57       return kind;
58     }
59 
60     /** The start position. */
pos()61     public int pos() {
62       return pos;
63     }
64 
65     /** The end position. */
endPos()66     public int endPos() {
67       return endPos;
68     }
69 
70     /** The escaped string value of a literal, or {@code null} for other tokens. */
stringVal()71     public String stringVal() {
72       return stringVal;
73     }
74   }
75 
76   private static final TokenKind STRINGFRAGMENT =
77       stream(TokenKind.values())
78           .filter(t -> t.name().contentEquals("STRINGFRAGMENT"))
79           .findFirst()
80           .orElse(null);
81 
isStringFragment(TokenKind kind)82   static boolean isStringFragment(TokenKind kind) {
83     return STRINGFRAGMENT != null && Objects.equals(kind, STRINGFRAGMENT);
84   }
85 
86   /** Lex the input and return a list of {@link RawTok}s. */
getTokens( String source, Context context, Set<TokenKind> stopTokens)87   public static ImmutableList<RawTok> getTokens(
88       String source, Context context, Set<TokenKind> stopTokens) {
89     if (source == null) {
90       return ImmutableList.of();
91     }
92     ScannerFactory fac = ScannerFactory.instance(context);
93     char[] buffer = (source + EOF_COMMENT).toCharArray();
94     Scanner scanner =
95         new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length));
96     ImmutableList.Builder<RawTok> tokens = ImmutableList.builder();
97     int end = source.length();
98     int last = 0;
99     do {
100       scanner.nextToken();
101       Token t = scanner.token();
102       if (t.comments != null) {
103         for (Comment c : Lists.reverse(t.comments)) {
104           if (last < c.getSourcePos(0)) {
105             tokens.add(new RawTok(null, null, last, c.getSourcePos(0)));
106           }
107           tokens.add(
108               new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length()));
109           last = c.getSourcePos(0) + c.getText().length();
110         }
111       }
112       if (stopTokens.contains(t.kind)) {
113         if (t.kind != TokenKind.EOF) {
114           end = t.pos;
115         }
116         break;
117       }
118       if (last < t.pos) {
119         tokens.add(new RawTok(null, null, last, t.pos));
120       }
121       int pos = t.pos;
122       int endPos = t.endPos;
123       if (isStringFragment(t.kind)) {
124         // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string
125         // literal values, followed by the tokens for the template arguments. For the formatter, we
126         // want the stream of tokens to appear in order by their start position, and also to have
127         // all the content from the original source text (including leading and trailing ", and the
128         // \ escapes from template arguments). This logic processes the token stream from javac to
129         // meet those requirements.
130         while (isStringFragment(t.kind)) {
131           endPos = t.endPos;
132           scanner.nextToken();
133           t = scanner.token();
134         }
135         // Read tokens for the string template arguments, until we read the end of the string
136         // template. The last token in a string template is always a trailing string fragment. Use
137         // lookahead to defer reading the token after the template until the next iteration of the
138         // outer loop.
139         while (scanner.token(/* lookahead= */ 1).endPos < endPos) {
140           scanner.nextToken();
141           t = scanner.token();
142         }
143         tokens.add(new RawTok(source.substring(pos, endPos), t.kind, pos, endPos));
144         last = endPos;
145       } else {
146         tokens.add(
147             new RawTok(
148                 t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null,
149                 t.kind,
150                 t.pos,
151                 t.endPos));
152         last = t.endPos;
153       }
154     } while (scanner.token().kind != TokenKind.EOF);
155     if (last < end) {
156       tokens.add(new RawTok(null, null, last, end));
157     }
158     return tokens.build();
159   }
160 
161   /** A {@link JavaTokenizer} that saves comments. */
162   static class CommentSavingTokenizer extends JavaTokenizer {
CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length)163     CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) {
164       super(fac, buffer, length);
165     }
166 
167     @Override
processComment(int pos, int endPos, CommentStyle style)168     protected Comment processComment(int pos, int endPos, CommentStyle style) {
169       char[] buf = getRawCharactersReflectively(pos, endPos);
170       return new CommentWithTextAndPosition(
171           pos, endPos, new AccessibleReader(fac, buf, buf.length), style);
172     }
173 
getRawCharactersReflectively(int beginIndex, int endIndex)174     private char[] getRawCharactersReflectively(int beginIndex, int endIndex) {
175       Object instance;
176       try {
177         instance = JavaTokenizer.class.getDeclaredField("reader").get(this);
178       } catch (ReflectiveOperationException e) {
179         instance = this;
180       }
181       try {
182         return (char[])
183             instance
184                 .getClass()
185                 .getMethod("getRawCharacters", int.class, int.class)
186                 .invoke(instance, beginIndex, endIndex);
187       } catch (ReflectiveOperationException e) {
188         throw new LinkageError(e.getMessage(), e);
189       }
190     }
191   }
192 
193   /** A {@link Comment} that saves its text and start position. */
194   static class CommentWithTextAndPosition implements Comment {
195 
196     private final int pos;
197     private final int endPos;
198     private final AccessibleReader reader;
199     private final CommentStyle style;
200 
201     private String text = null;
202 
CommentWithTextAndPosition( int pos, int endPos, AccessibleReader reader, CommentStyle style)203     public CommentWithTextAndPosition(
204         int pos, int endPos, AccessibleReader reader, CommentStyle style) {
205       this.pos = pos;
206       this.endPos = endPos;
207       this.reader = reader;
208       this.style = style;
209     }
210 
211     /**
212      * Returns the source position of the character at index {@code index} in the comment text.
213      *
214      * <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace
215      * and '*' characters when indexing into doc comments, but we don't need any of that.
216      */
217     @Override
getSourcePos(int index)218     public int getSourcePos(int index) {
219       checkArgument(
220           0 <= index && index < (endPos - pos),
221           "Expected %s in the range [0, %s)",
222           index,
223           endPos - pos);
224       return pos + index;
225     }
226 
227     @Override
getStyle()228     public CommentStyle getStyle() {
229       return style;
230     }
231 
232     @Override
getText()233     public String getText() {
234       String text = this.text;
235       if (text == null) {
236         this.text = text = new String(reader.getRawCharacters());
237       }
238       return text;
239     }
240 
241     /**
242      * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check).
243      *
244      * @return false
245      */
246     @Override
isDeprecated()247     public boolean isDeprecated() {
248       return false;
249     }
250 
251     @Override
toString()252     public String toString() {
253       return String.format("Comment: '%s'", getText());
254     }
255   }
256 
257   // Scanner(ScannerFactory, JavaTokenizer) is package-private
258   static class AccessibleScanner extends Scanner {
AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer)259     protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) {
260       super(fac, tokenizer);
261     }
262   }
263 
264   // UnicodeReader(ScannerFactory, char[], int) is package-private
265   static class AccessibleReader extends UnicodeReader {
AccessibleReader(ScannerFactory fac, char[] buffer, int length)266     protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) {
267       super(fac, buffer, length);
268     }
269   }
270 }
271