1 /* 2 * Copyright 2016 Google Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 5 * in compliance with the License. You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software distributed under the License 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 11 * or implied. See the License for the specific language governing permissions and limitations under 12 * the License. 13 */ 14 15 package com.google.googlejavaformat.java; 16 17 import static com.google.common.base.Preconditions.checkArgument; 18 19 import com.google.common.collect.ImmutableList; 20 import com.google.common.collect.Lists; 21 import com.sun.tools.javac.parser.JavaTokenizer; 22 import com.sun.tools.javac.parser.Scanner; 23 import com.sun.tools.javac.parser.ScannerFactory; 24 import com.sun.tools.javac.parser.Tokens.Comment; 25 import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; 26 import com.sun.tools.javac.parser.Tokens.Token; 27 import com.sun.tools.javac.parser.Tokens.TokenKind; 28 import com.sun.tools.javac.parser.UnicodeReader; 29 import com.sun.tools.javac.util.Context; 30 import java.util.Set; 31 32 /** A wrapper around javac's lexer. */ 33 class JavacTokens { 34 35 /** The lexer eats terminal comments, so feed it one we don't care about. */ 36 // TODO(b/33103797): fix javac and remove the work-around 37 private static final CharSequence EOF_COMMENT = "\n//EOF"; 38 39 /** An unprocessed input token, including whitespace and comments. */ 40 static class RawTok { 41 private final String stringVal; 42 private final TokenKind kind; 43 private final int pos; 44 private final int endPos; 45 RawTok(String stringVal, TokenKind kind, int pos, int endPos)46 RawTok(String stringVal, TokenKind kind, int pos, int endPos) { 47 this.stringVal = stringVal; 48 this.kind = kind; 49 this.pos = pos; 50 this.endPos = endPos; 51 } 52 53 /** The token kind, or {@code null} for whitespace and comments. */ kind()54 public TokenKind kind() { 55 return kind; 56 } 57 58 /** The start position. */ pos()59 public int pos() { 60 return pos; 61 } 62 63 /** The end position. */ endPos()64 public int endPos() { 65 return endPos; 66 } 67 68 /** The escaped string value of a literal, or {@code null} for other tokens. */ stringVal()69 public String stringVal() { 70 return stringVal; 71 } 72 } 73 74 /** Lex the input and return a list of {@link RawTok}s. */ getTokens( String source, Context context, Set<TokenKind> stopTokens)75 public static ImmutableList<RawTok> getTokens( 76 String source, Context context, Set<TokenKind> stopTokens) { 77 if (source == null) { 78 return ImmutableList.of(); 79 } 80 ScannerFactory fac = ScannerFactory.instance(context); 81 char[] buffer = (source + EOF_COMMENT).toCharArray(); 82 Scanner scanner = 83 new AccessibleScanner(fac, new CommentSavingTokenizer(fac, buffer, buffer.length)); 84 ImmutableList.Builder<RawTok> tokens = ImmutableList.builder(); 85 int end = source.length(); 86 int last = 0; 87 do { 88 scanner.nextToken(); 89 Token t = scanner.token(); 90 if (t.comments != null) { 91 for (Comment c : Lists.reverse(t.comments)) { 92 if (last < c.getSourcePos(0)) { 93 tokens.add(new RawTok(null, null, last, c.getSourcePos(0))); 94 } 95 tokens.add( 96 new RawTok(null, null, c.getSourcePos(0), c.getSourcePos(0) + c.getText().length())); 97 last = c.getSourcePos(0) + c.getText().length(); 98 } 99 } 100 if (stopTokens.contains(t.kind)) { 101 if (t.kind != TokenKind.EOF) { 102 end = t.pos; 103 } 104 break; 105 } 106 if (last < t.pos) { 107 tokens.add(new RawTok(null, null, last, t.pos)); 108 } 109 tokens.add( 110 new RawTok( 111 t.kind == TokenKind.STRINGLITERAL ? "\"" + t.stringVal() + "\"" : null, 112 t.kind, 113 t.pos, 114 t.endPos)); 115 last = t.endPos; 116 } while (scanner.token().kind != TokenKind.EOF); 117 if (last < end) { 118 tokens.add(new RawTok(null, null, last, end)); 119 } 120 return tokens.build(); 121 } 122 123 /** A {@link JavaTokenizer} that saves comments. */ 124 static class CommentSavingTokenizer extends JavaTokenizer { CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length)125 CommentSavingTokenizer(ScannerFactory fac, char[] buffer, int length) { 126 super(fac, buffer, length); 127 } 128 129 @Override processComment(int pos, int endPos, CommentStyle style)130 protected Comment processComment(int pos, int endPos, CommentStyle style) { 131 char[] buf = getRawCharactersReflectively(pos, endPos); 132 return new CommentWithTextAndPosition( 133 pos, endPos, new AccessibleReader(fac, buf, buf.length), style); 134 } 135 getRawCharactersReflectively(int beginIndex, int endIndex)136 private char[] getRawCharactersReflectively(int beginIndex, int endIndex) { 137 Object instance; 138 try { 139 instance = JavaTokenizer.class.getDeclaredField("reader").get(this); 140 } catch (ReflectiveOperationException e) { 141 instance = this; 142 } 143 try { 144 return (char[]) 145 instance 146 .getClass() 147 .getMethod("getRawCharacters", int.class, int.class) 148 .invoke(instance, beginIndex, endIndex); 149 } catch (ReflectiveOperationException e) { 150 throw new LinkageError(e.getMessage(), e); 151 } 152 } 153 } 154 155 /** A {@link Comment} that saves its text and start position. */ 156 static class CommentWithTextAndPosition implements Comment { 157 158 private final int pos; 159 private final int endPos; 160 private final AccessibleReader reader; 161 private final CommentStyle style; 162 163 private String text = null; 164 CommentWithTextAndPosition( int pos, int endPos, AccessibleReader reader, CommentStyle style)165 public CommentWithTextAndPosition( 166 int pos, int endPos, AccessibleReader reader, CommentStyle style) { 167 this.pos = pos; 168 this.endPos = endPos; 169 this.reader = reader; 170 this.style = style; 171 } 172 173 /** 174 * Returns the source position of the character at index {@code index} in the comment text. 175 * 176 * <p>The handling of javadoc comments in javac has more logic to skip over leading whitespace 177 * and '*' characters when indexing into doc comments, but we don't need any of that. 178 */ 179 @Override getSourcePos(int index)180 public int getSourcePos(int index) { 181 checkArgument( 182 0 <= index && index < (endPos - pos), 183 "Expected %s in the range [0, %s)", 184 index, 185 endPos - pos); 186 return pos + index; 187 } 188 189 @Override getStyle()190 public CommentStyle getStyle() { 191 return style; 192 } 193 194 @Override getText()195 public String getText() { 196 String text = this.text; 197 if (text == null) { 198 this.text = text = new String(reader.getRawCharacters()); 199 } 200 return text; 201 } 202 203 /** 204 * We don't care about {@code @deprecated} javadoc tags (see the DepAnn check). 205 * 206 * @return false 207 */ 208 @Override isDeprecated()209 public boolean isDeprecated() { 210 return false; 211 } 212 213 @Override toString()214 public String toString() { 215 return String.format("Comment: '%s'", getText()); 216 } 217 } 218 219 // Scanner(ScannerFactory, JavaTokenizer) is package-private 220 static class AccessibleScanner extends Scanner { AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer)221 protected AccessibleScanner(ScannerFactory fac, JavaTokenizer tokenizer) { 222 super(fac, tokenizer); 223 } 224 } 225 226 // UnicodeReader(ScannerFactory, char[], int) is package-private 227 static class AccessibleReader extends UnicodeReader { AccessibleReader(ScannerFactory fac, char[] buffer, int length)228 protected AccessibleReader(ScannerFactory fac, char[] buffer, int length) { 229 super(fac, buffer, length); 230 } 231 } 232 } 233