• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5  * in compliance with the License. You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software distributed under the License
10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11  * or implied. See the License for the specific language governing permissions and limitations under
12  * the License.
13  */
14 
15 package com.google.googlejavaformat.java;
16 
17 import static com.google.common.base.Preconditions.checkArgument;
18 
19 import com.google.common.collect.ImmutableList;
20 import com.google.common.collect.Lists;
21 import com.sun.tools.javac.parser.JavaTokenizer;
22 import com.sun.tools.javac.parser.Scanner;
23 import com.sun.tools.javac.parser.ScannerFactory;
24 import com.sun.tools.javac.parser.Tokens.Comment;
25 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
26 import com.sun.tools.javac.parser.Tokens.Token;
27 import com.sun.tools.javac.parser.Tokens.TokenKind;
28 import com.sun.tools.javac.parser.UnicodeReader;
29 import com.sun.tools.javac.util.Context;
30 import java.util.Set;
31 
32 /** A wrapper around javac's lexer. */
33 class JavacTokens {
34 
35   /** The lexer eats terminal comments, so feed it one we don't care about. */
36   // TODO(b/33103797): fix javac and remove the work-around
37   private static final CharSequence EOF_COMMENT = "\n//EOF";
38 
39   /** An unprocessed input token, including whitespace and comments. */
40   static class RawTok {
41     private final String stringVal;
42     private final TokenKind kind;
43     private final int pos;
44     private final int endPos;
45 
RawTok(String stringVal, TokenKind kind, int pos, int endPos)46     RawTok(String stringVal, TokenKind kind, int pos, int endPos) {
47       this.stringVal = stringVal;
48       this.kind = kind;
49       this.pos = pos;
50       this.endPos = endPos;
51     }
52 
53     /** The token kind, or {@code null} for whitespace and comments. */
kind()54     public TokenKind kind() {
55       return kind;
56     }
57 
58     /** The start position. */
pos()59     public int pos() {
60       return pos;
61     }
62 
63     /** The end position. */
endPos()64     public int endPos() {
65       return endPos;
66     }
67 
68     /** The escaped string value of a literal, or {@code null} for other tokens. */
stringVal()69     public String stringVal() {
70       return stringVal;
71     }
72   }
73 
74   /** Lex the input and return a list of {@link RawTok}s. */
getTokens( String source, Context context, Set<TokenKind> stopTokens)75   public static ImmutableList<RawTok> getTokens(
76       String source, Context context, Set<TokenKind> stopTokens) {
77     if (source == null) {
78       return ImmutableList.of();
79     }
80     ScannerFactory fac = ScannerFactory.instance(context);
81     char[] buffer = (source + EOF_COMMENT).toCharArray();
82     Scanner scanner =
83         new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length));
84     ImmutableList.Builder<RawTok> tokens = ImmutableList.builder();
85     int end = source.length();
86     int last = 0;
87     do {
88       scanner.nextToken();
89       Token t = scanner.token();
90       if (t.comments != null) {
91         for (Comment c : Lists.reverse(t.comments)) {
92           if (last < c.getSourcePos(0)) {
93             tokens.add(new RawTok(null, null, last, c.getSourcePos(0)));
94           }
95           tokens.add(
96               new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length()));
97           last = c.getSourcePos(0) + c.getText().length();
98         }
99       }
100       if (stopTokens.contains(t.kind)) {
101         if (t.kind != TokenKind.EOF) {
102           end = t.pos;
103         }
104         break;
105       }
106       if (last < t.pos) {
107         tokens.add(new RawTok(null, null, last, t.pos));
108       }
109       tokens.add(
110           new RawTok(
111               t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null,
112               t.kind,
113               t.pos,
114               t.endPos));
115       last = t.endPos;
116     } while (scanner.token().kind != TokenKind.EOF);
117     if (last < end) {
118       tokens.add(new RawTok(null, null, last, end));
119     }
120     return tokens.build();
121   }
122 
123   /** A {@link JavaTokenizer} that saves comments. */
124   static class CommentSavingTokenizer extends JavaTokenizer {
CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length)125     CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) {
126       super(fac, buffer, length);
127     }
128 
129     @Override
processComment(int pos, int endPos, CommentStyle style)130     protected Comment processComment(int pos, int endPos, CommentStyle style) {
131       char[] buf = getRawCharactersReflectively(pos, endPos);
132       return new CommentWithTextAndPosition(
133           pos, endPos, new AccessibleReader(fac, buf, buf.length), style);
134     }
135 
getRawCharactersReflectively(int beginIndex, int endIndex)136     private char[] getRawCharactersReflectively(int beginIndex, int endIndex) {
137       Object instance;
138       try {
139         instance = JavaTokenizer.class.getDeclaredField("reader").get(this);
140       } catch (ReflectiveOperationException e) {
141         instance = this;
142       }
143       try {
144         return (char[])
145             instance
146                 .getClass()
147                 .getMethod("getRawCharacters", int.class, int.class)
148                 .invoke(instance, beginIndex, endIndex);
149       } catch (ReflectiveOperationException e) {
150         throw new LinkageError(e.getMessage(), e);
151       }
152     }
153   }
154 
155   /** A {@link Comment} that saves its text and start position. */
156   static class CommentWithTextAndPosition implements Comment {
157 
158     private final int pos;
159     private final int endPos;
160     private final AccessibleReader reader;
161     private final CommentStyle style;
162 
163     private String text = null;
164 
CommentWithTextAndPosition( int pos, int endPos, AccessibleReader reader, CommentStyle style)165     public CommentWithTextAndPosition(
166         int pos, int endPos, AccessibleReader reader, CommentStyle style) {
167       this.pos = pos;
168       this.endPos = endPos;
169       this.reader = reader;
170       this.style = style;
171     }
172 
173     /**
174      * Returns the source position of the character at index {@code index} in the comment text.
175      *
176      * <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace
177      * and '*' characters when indexing into doc comments, but we don't need any of that.
178      */
179     @Override
getSourcePos(int index)180     public int getSourcePos(int index) {
181       checkArgument(
182           0 <= index && index < (endPos - pos),
183           "Expected %s in the range [0, %s)",
184           index,
185           endPos - pos);
186       return pos + index;
187     }
188 
189     @Override
getStyle()190     public CommentStyle getStyle() {
191       return style;
192     }
193 
194     @Override
getText()195     public String getText() {
196       String text = this.text;
197       if (text == null) {
198         this.text = text = new String(reader.getRawCharacters());
199       }
200       return text;
201     }
202 
203     /**
204      * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check).
205      *
206      * @return false
207      */
208     @Override
isDeprecated()209     public boolean isDeprecated() {
210       return false;
211     }
212 
213     @Override
toString()214     public String toString() {
215       return String.format("Comment: '%s'", getText());
216     }
217   }
218 
219   // Scanner(ScannerFactory, JavaTokenizer) is package-private
220   static class AccessibleScanner extends Scanner {
AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer)221     protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) {
222       super(fac, tokenizer);
223     }
224   }
225 
226   // UnicodeReader(ScannerFactory, char[], int) is package-private
227   static class AccessibleReader extends UnicodeReader {
AccessibleReader(ScannerFactory fac, char[] buffer, int length)228     protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) {
229       super(fac, buffer, length);
230     }
231   }
232 }
233