1 /* 2 * Copyright 2016 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.googlejavaformat.java; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 import static java.util.Arrays.stream; 19 20 import com.google.common.collect.ImmutableList; 21 import com.google.common.collect.Lists; 22 import com.sun.tools.javac.parser.JavaTokenizer; 23 import com.sun.tools.javac.parser.Scanner; 24 import com.sun.tools.javac.parser.ScannerFactory; 25 import com.sun.tools.javac.parser.Tokens.Comment; 26 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 27 import com.sun.tools.javac.parser.Tokens.Token; 28 import com.sun.tools.javac.parser.Tokens.TokenKind; 29 import com.sun.tools.javac.parser.UnicodeReader; 30 import com.sun.tools.javac.util.Context; 31 import java.util.Objects; 32 import java.util.Set; 33 34 /** A wrapper around javac's lexer. */ 35 class JavacTokens { 36 37 /** The lexer eats terminal comments, so feed it one we don't care about. */ 38 // TODO(b/33103797): fix javac and remove the work-around 39 private static final CharSequence EOF_COMMENT = "\n//EOF"; 40 41 /** An unprocessed input token, including whitespace and comments. */ 42 static class RawTok { 43 private final String stringVal; 44 private final TokenKind kind; 45 private final int pos; 46 private final int endPos; 47 RawTok(String stringVal, TokenKind kind, int pos, int endPos)48 RawTok(String stringVal, TokenKind kind, int pos, int endPos) { 49 this.stringVal = stringVal; 50 this.kind = kind; 51 this.pos = pos; 52 this.endPos = endPos; 53 } 54 55 /** The token kind, or {@code null} for whitespace and comments. */ kind()56 public TokenKind kind() { 57 return kind; 58 } 59 60 /** The start position. */ pos()61 public int pos() { 62 return pos; 63 } 64 65 /** The end position. */ endPos()66 public int endPos() { 67 return endPos; 68 } 69 70 /** The escaped string value of a literal, or {@code null} for other tokens. */ stringVal()71 public String stringVal() { 72 return stringVal; 73 } 74 } 75 76 private static final TokenKind STRINGFRAGMENT = 77 stream(TokenKind.values()) 78 .filter(t -> t.name().contentEquals("STRINGFRAGMENT")) 79 .findFirst() 80 .orElse(null); 81 isStringFragment(TokenKind kind)82 static boolean isStringFragment(TokenKind kind) { 83 return STRINGFRAGMENT != null && Objects.equals(kind, STRINGFRAGMENT); 84 } 85 86 /** Lex the input and return a list of {@link RawTok}s. */ getTokens( String source, Context context, Set<TokenKind> stopTokens)87 public static ImmutableList<RawTok> getTokens( 88 String source, Context context, Set<TokenKind> stopTokens) { 89 if (source == null) { 90 return ImmutableList.of(); 91 } 92 ScannerFactory fac = ScannerFactory.instance(context); 93 char[] buffer = (source + EOF_COMMENT).toCharArray(); 94 Scanner scanner = 95 new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length)); 96 ImmutableList.Builder<RawTok> tokens = ImmutableList.builder(); 97 int end = source.length(); 98 int last = 0; 99 do { 100 scanner.nextToken(); 101 Token t = scanner.token(); 102 if (t.comments != null) { 103 for (Comment c : Lists.reverse(t.comments)) { 104 if (last < c.getSourcePos(0)) { 105 tokens.add(new RawTok(null, null, last, c.getSourcePos(0))); 106 } 107 tokens.add( 108 new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length())); 109 last = c.getSourcePos(0) + c.getText().length(); 110 } 111 } 112 if (stopTokens.contains(t.kind)) { 113 if (t.kind != TokenKind.EOF) { 114 end = t.pos; 115 } 116 break; 117 } 118 if (last < t.pos) { 119 tokens.add(new RawTok(null, null, last, t.pos)); 120 } 121 int pos = t.pos; 122 int endPos = t.endPos; 123 if (isStringFragment(t.kind)) { 124 // A string template is tokenized as a series of STRINGFRAGMENT tokens containing the string 125 // literal values, followed by the tokens for the template arguments. For the formatter, we 126 // want the stream of tokens to appear in order by their start position, and also to have 127 // all the content from the original source text (including leading and trailing ", and the 128 // \ escapes from template arguments). This logic processes the token stream from javac to 129 // meet those requirements. 130 while (isStringFragment(t.kind)) { 131 endPos = t.endPos; 132 scanner.nextToken(); 133 t = scanner.token(); 134 } 135 // Read tokens for the string template arguments, until we read the end of the string 136 // template. The last token in a string template is always a trailing string fragment. Use 137 // lookahead to defer reading the token after the template until the next iteration of the 138 // outer loop. 139 while (scanner.token(/* lookahead= */ 1).endPos < endPos) { 140 scanner.nextToken(); 141 t = scanner.token(); 142 } 143 tokens.add(new RawTok(source.substring(pos, endPos), t.kind, pos, endPos)); 144 last = endPos; 145 } else { 146 tokens.add( 147 new RawTok( 148 t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null, 149 t.kind, 150 t.pos, 151 t.endPos)); 152 last = t.endPos; 153 } 154 } while (scanner.token().kind != TokenKind.EOF); 155 if (last < end) { 156 tokens.add(new RawTok(null, null, last, end)); 157 } 158 return tokens.build(); 159 } 160 161 /** A {@link JavaTokenizer} that saves comments. */ 162 static class CommentSavingTokenizer extends JavaTokenizer { CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length)163 CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) { 164 super(fac, buffer, length); 165 } 166 167 @Override processComment(int pos, int endPos, CommentStyle style)168 protected Comment processComment(int pos, int endPos, CommentStyle style) { 169 char[] buf = getRawCharactersReflectively(pos, endPos); 170 return new CommentWithTextAndPosition( 171 pos, endPos, new AccessibleReader(fac, buf, buf.length), style); 172 } 173 getRawCharactersReflectively(int beginIndex, int endIndex)174 private char[] getRawCharactersReflectively(int beginIndex, int endIndex) { 175 Object instance; 176 try { 177 instance = JavaTokenizer.class.getDeclaredField("reader").get(this); 178 } catch (ReflectiveOperationException e) { 179 instance = this; 180 } 181 try { 182 return (char[]) 183 instance 184 .getClass() 185 .getMethod("getRawCharacters", int.class, int.class) 186 .invoke(instance, beginIndex, endIndex); 187 } catch (ReflectiveOperationException e) { 188 throw new LinkageError(e.getMessage(), e); 189 } 190 } 191 } 192 193 /** A {@link Comment} that saves its text and start position. */ 194 static class CommentWithTextAndPosition implements Comment { 195 196 private final int pos; 197 private final int endPos; 198 private final AccessibleReader reader; 199 private final CommentStyle style; 200 201 private String text = null; 202 CommentWithTextAndPosition( int pos, int endPos, AccessibleReader reader, CommentStyle style)203 public CommentWithTextAndPosition( 204 int pos, int endPos, AccessibleReader reader, CommentStyle style) { 205 this.pos = pos; 206 this.endPos = endPos; 207 this.reader = reader; 208 this.style = style; 209 } 210 211 /** 212 * Returns the source position of the character at index {@code index} in the comment text. 213 * 214 * <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace 215 * and '*' characters when indexing into doc comments, but we don't need any of that. 216 */ 217 @Override getSourcePos(int index)218 public int getSourcePos(int index) { 219 checkArgument( 220 0 <= index && index < (endPos - pos), 221 "Expected %s in the range [0, %s)", 222 index, 223 endPos - pos); 224 return pos + index; 225 } 226 227 @Override getStyle()228 public CommentStyle getStyle() { 229 return style; 230 } 231 232 @Override getText()233 public String getText() { 234 String text = this.text; 235 if (text == null) { 236 this.text = text = new String(reader.getRawCharacters()); 237 } 238 return text; 239 } 240 241 /** 242 * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check). 243 * 244 * @return false 245 */ 246 @Override isDeprecated()247 public boolean isDeprecated() { 248 return false; 249 } 250 251 @Override toString()252 public String toString() { 253 return String.format("Comment: '%s'", getText()); 254 } 255 } 256 257 // Scanner(ScannerFactory, JavaTokenizer) is package-private 258 static class AccessibleScanner extends Scanner { AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer)259 protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) { 260 super(fac, tokenizer); 261 } 262 } 263 264 // UnicodeReader(ScannerFactory, char[], int) is package-private 265 static class AccessibleReader extends UnicodeReader { AccessibleReader(ScannerFactory fac, char[] buffer, int length)266 protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) { 267 super(fac, buffer, length); 268 } 269 } 270 } 271