1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import com.google.common.collect.ImmutableList; 20 import com.google.errorprone.annotations.CheckReturnValue; 21 import com.google.turbine.diag.TurbineError; 22 import com.google.turbine.diag.TurbineError.ErrorKind; 23 import java.util.ArrayDeque; 24 import java.util.ArrayList; 25 import java.util.List; 26 27 /** 28 * Pre-process variable initializer expressions to handle multi-variable declarations. 29 * 30 * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable 31 * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing 32 * the full expression language, so we pre-process variable initializers to extract the expression 33 * and then parse it with an simple constant expression parser that fails if it sees an expression 34 * it doesn't understand. 35 * 36 * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the 37 * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or 38 * anonymous class declarations, the preprocessor also matches braces. 39 * 40 * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in 41 * hindsight were probably a mistake. Multi-variable declarations contain a list of name and 42 * initializer pairs separated by commas. The initializer expressions may also contain commas, so 43 * it's non-trivial to split on initializer boundaries. For example, consider {@code int x = a < b, 44 * c = d;}. We can't tell looking at the prefix {@code a < b, c} whether that's a less-than 45 * expression followed by another initializer, or the start of a generic type: {@code a<b, c>.foo(}. 46 * Distinguishing between these cases requires arbitrary lookahead. 47 * 48 * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it 49 * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to 50 * contain complex generics. Multi-variable declarations are also disallowed by the Style guide. 51 */ 52 public class VariableInitializerParser { 53 54 enum FieldInitState { 55 /** The beginning of an initializer expression. */ 56 START, 57 /** The state after `<identifier> <`. */ 58 TYPE, 59 } 60 61 /** Indices into {@code LT} tokens used for backtracking. */ 62 final ArrayDeque<Integer> ltIndices = new ArrayDeque<>(); 63 64 /** Indices into {@code commas} used for backtracking. */ 65 final ArrayDeque<Integer> commaIndices = new ArrayDeque<>(); 66 67 /** The saved tokens. */ 68 List<SavedToken> tokens = new ArrayList<>(); 69 70 /** 71 * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by 72 * commas in the input). 73 */ 74 List<Integer> commas = new ArrayList<>(); 75 76 public Token token; 77 FieldInitState state = FieldInitState.START; 78 int depth = 0; 79 80 final Lexer lexer; 81 VariableInitializerParser(Token token, Lexer lexer)82 public VariableInitializerParser(Token token, Lexer lexer) { 83 this.token = token; 84 this.lexer = lexer; 85 } 86 next()87 private void next() { 88 token = lexer.next(); 89 } 90 91 /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */ parseInitializers()92 public List<List<SavedToken>> parseInitializers() { 93 OUTER: 94 while (true) { 95 switch (token) { 96 case IDENT: 97 save(); 98 next(); 99 if (state == FieldInitState.START) { 100 if (token == Token.LT) { 101 state = FieldInitState.TYPE; 102 depth = 1; 103 ltIndices.clear(); 104 commaIndices.clear(); 105 ltIndices.addLast(tokens.size()); 106 commaIndices.addLast(commas.size()); 107 save(); 108 next(); 109 break; 110 } 111 } 112 break; 113 case LT: 114 if (state == FieldInitState.TYPE) { 115 depth++; 116 ltIndices.addLast(tokens.size()); 117 commaIndices.addLast(commas.size()); 118 } 119 save(); 120 next(); 121 break; 122 case GTGTGT: 123 save(); 124 next(); 125 dropBracks(3); 126 break; 127 case GTGT: 128 save(); 129 next(); 130 dropBracks(2); 131 break; 132 case GT: 133 save(); 134 next(); 135 dropBracks(1); 136 break; 137 case LPAREN: 138 save(); 139 next(); 140 dropParens(); 141 break; 142 case LBRACE: 143 save(); 144 next(); 145 dropBraces(); 146 break; 147 case SEMI: 148 switch (state) { 149 case START: 150 case TYPE: 151 break OUTER; 152 } 153 save(); 154 next(); 155 break; 156 case COMMA: 157 save(); 158 next(); 159 switch (state) { 160 case START: 161 case TYPE: 162 commas.add(tokens.size()); 163 break; 164 } 165 break; 166 case DOT: 167 save(); 168 next(); 169 dropTypeArguments(); 170 break; 171 case NEW: 172 save(); 173 next(); 174 dropTypeArguments(); 175 while (token == Token.IDENT) { 176 save(); 177 next(); 178 dropTypeArguments(); 179 if (token == Token.DOT) { 180 next(); 181 } else { 182 break; 183 } 184 } 185 break; 186 case COLONCOLON: 187 save(); 188 next(); 189 dropTypeArguments(); 190 if (token == Token.NEW) { 191 next(); 192 } 193 break; 194 case EOF: 195 break OUTER; 196 default: 197 save(); 198 next(); 199 break; 200 } 201 } 202 List<List<SavedToken>> result = new ArrayList<>(); 203 int start = 0; 204 for (int idx : commas) { 205 result.add( 206 ImmutableList.<SavedToken>builder() 207 .addAll(tokens.subList(start, idx - 1)) 208 .add(new SavedToken(Token.EOF, null, tokens.get(idx - 1).position)) 209 .build()); 210 start = idx; 211 } 212 result.add( 213 ImmutableList.<SavedToken>builder() 214 .addAll(tokens.subList(start, tokens.size())) 215 .add(new SavedToken(Token.EOF, null, lexer.position())) 216 .build()); 217 return result; 218 } 219 dropParens()220 private void dropParens() { 221 int depth = 1; 222 while (depth > 0) { 223 switch (token) { 224 case LPAREN: 225 save(); 226 next(); 227 depth++; 228 break; 229 case RPAREN: 230 save(); 231 next(); 232 depth--; 233 break; 234 case EOF: 235 throw error(ErrorKind.UNEXPECTED_EOF); 236 default: 237 save(); 238 next(); 239 break; 240 } 241 } 242 } 243 dropBraces()244 private void dropBraces() { 245 int depth = 1; 246 while (depth > 0) { 247 switch (token) { 248 case LBRACE: 249 save(); 250 next(); 251 depth++; 252 break; 253 case RBRACE: 254 save(); 255 next(); 256 depth--; 257 break; 258 case EOF: 259 throw error(ErrorKind.UNEXPECTED_EOF); 260 default: 261 save(); 262 next(); 263 break; 264 } 265 } 266 } 267 save()268 private void save() { 269 tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position())); 270 } 271 dropBracks(int many)272 private void dropBracks(int many) { 273 if (state != FieldInitState.TYPE) { 274 return; 275 } 276 if (depth <= many) { 277 state = FieldInitState.START; 278 } 279 depth -= many; 280 int lastType = -1; 281 int lastComma = -1; 282 for (int i = 0; i < many; i++) { 283 if (ltIndices.isEmpty()) { 284 throw error(ErrorKind.UNEXPECTED_TOKEN, ">"); 285 } 286 lastType = ltIndices.removeLast(); 287 lastComma = commaIndices.removeLast(); 288 } 289 // The only known type argument locations that require look-ahead to classify are method 290 // references with parametric receivers, and qualified nested type names: 291 switch (token) { 292 case COLONCOLON: 293 case DOT: 294 this.tokens = tokens.subList(0, lastType); 295 this.commas = commas.subList(0, lastComma); 296 break; 297 default: 298 break; 299 } 300 } 301 302 /** 303 * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are 304 * unambiguously type argument lists, not less-than. 305 * 306 * <p>Since the lexer munches multiple close braces as a single token, there's handling of right 307 * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`. 308 */ dropTypeArguments()309 private void dropTypeArguments() { 310 if (token != Token.LT) { 311 return; 312 } 313 next(); 314 int depth = 1; 315 while (depth > 0) { 316 switch (token) { 317 case LT: 318 depth++; 319 next(); 320 break; 321 case GTGTGT: 322 depth -= 3; 323 next(); 324 break; 325 case GTGT: 326 depth -= 2; 327 next(); 328 break; 329 case GT: 330 depth--; 331 next(); 332 break; 333 case EOF: 334 throw error(ErrorKind.UNEXPECTED_EOF); 335 default: 336 next(); 337 break; 338 } 339 } 340 } 341 342 @CheckReturnValue error(ErrorKind kind, Object... args)343 private TurbineError error(ErrorKind kind, Object... args) { 344 return TurbineError.format( 345 lexer.source(), 346 Math.min(lexer.position(), lexer.source().source().length() - 1), 347 kind, 348 args); 349 } 350 } 351