1 /* 2 * Copyright 2016 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.turbine.parse; 18 19 import com.google.common.collect.ImmutableList; 20 import com.google.errorprone.annotations.CheckReturnValue; 21 import com.google.turbine.diag.TurbineError; 22 import com.google.turbine.diag.TurbineError.ErrorKind; 23 import java.util.ArrayDeque; 24 import java.util.ArrayList; 25 import java.util.List; 26 27 /** 28 * Pre-process variable initializer expressions to handle multi-variable declarations. 29 * 30 * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable 31 * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing 32 * the full expression language, so we pre-process variable initializers to extract the expression 33 * and then parse it with an simple constant expression parser that fails if it sees an expression 34 * it doesn't understand. 35 * 36 * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the 37 * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or 38 * anonymous class declarations, the preprocessor also matches braces. 39 * 40 * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in 41 * hindsight were probably a mistake. Multi-variable declarations contain a list of name and 42 * initializer pairs separated by commas. The initializer expressions may also contain commas, so 43 * it's non-trivial to split on initializer boundaries. For example, consider `int x = a < b, c = 44 * d;`. We can't tell looking at the prefix `a < b, c` whether that's a less-than expression 45 * followed by another initializer, or the start of a generic type: `a<b, c>.foo()`. Distinguishing 46 * between these cases requires arbitrary lookahead. 47 * 48 * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it 49 * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to 50 * contain complex generics. Multi-variable declarations are also disallowed by the Style guide. 51 */ 52 public class VariableInitializerParser { 53 54 enum FieldInitState { 55 /** The beginning of an initializer expression. */ 56 START, 57 /** The state after `<identifier> <`. */ 58 TYPE, 59 } 60 61 /** Indices into {@code LT} tokens used for backtracking. */ 62 final ArrayDeque<Integer> ltIndices = new ArrayDeque<>(); 63 64 /** Indices into {@code commas} used for backtracking. */ 65 final ArrayDeque<Integer> commaIndices = new ArrayDeque<>(); 66 67 /** The saved tokens. */ 68 List<SavedToken> tokens = new ArrayList<>(); 69 70 /** 71 * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by 72 * commas in the input). 73 */ 74 List<Integer> commas = new ArrayList<>(); 75 76 public Token token; 77 FieldInitState state = FieldInitState.START; 78 int depth = 0; 79 80 final Lexer lexer; 81 VariableInitializerParser(Token token, Lexer lexer)82 public VariableInitializerParser(Token token, Lexer lexer) { 83 this.token = token; 84 this.lexer = lexer; 85 } 86 next()87 private void next() { 88 token = lexer.next(); 89 } 90 91 /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */ parseInitializers()92 public List<List<SavedToken>> parseInitializers() { 93 OUTER: 94 while (true) { 95 switch (token) { 96 case IDENT: 97 save(); 98 next(); 99 if (state == FieldInitState.START) { 100 if (token == Token.LT) { 101 state = FieldInitState.TYPE; 102 depth = 1; 103 ltIndices.clear(); 104 commaIndices.clear(); 105 ltIndices.addLast(tokens.size()); 106 commaIndices.addLast(commas.size()); 107 save(); 108 next(); 109 break; 110 } 111 } 112 break; 113 case LT: 114 if (state == FieldInitState.TYPE) { 115 depth++; 116 ltIndices.addLast(tokens.size()); 117 commaIndices.addLast(commas.size()); 118 } 119 save(); 120 next(); 121 break; 122 case GTGTGT: 123 save(); 124 next(); 125 dropBracks(3); 126 break; 127 case GTGT: 128 save(); 129 next(); 130 dropBracks(2); 131 break; 132 case GT: 133 save(); 134 next(); 135 dropBracks(1); 136 break; 137 case LPAREN: 138 save(); 139 next(); 140 dropParens(); 141 break; 142 case LBRACE: 143 save(); 144 next(); 145 dropBraces(); 146 break; 147 case SEMI: 148 switch (state) { 149 case START: 150 case TYPE: 151 break OUTER; 152 default: 153 break; 154 } 155 save(); 156 next(); 157 break; 158 case COMMA: 159 save(); 160 next(); 161 switch (state) { 162 case START: 163 case TYPE: 164 commas.add(tokens.size()); 165 break; 166 default: 167 break; 168 } 169 break; 170 case DOT: 171 save(); 172 next(); 173 dropTypeArguments(); 174 break; 175 case NEW: 176 save(); 177 next(); 178 dropTypeArguments(); 179 while (token == Token.IDENT) { 180 save(); 181 next(); 182 dropTypeArguments(); 183 if (token == Token.DOT) { 184 next(); 185 } else { 186 break; 187 } 188 } 189 break; 190 case COLONCOLON: 191 save(); 192 next(); 193 dropTypeArguments(); 194 if (token == Token.NEW) { 195 next(); 196 } 197 break; 198 case EOF: 199 break OUTER; 200 default: 201 save(); 202 next(); 203 break; 204 } 205 } 206 List<List<SavedToken>> result = new ArrayList<>(); 207 int start = 0; 208 for (int idx : commas) { 209 result.add( 210 ImmutableList.<SavedToken>builder() 211 .addAll(tokens.subList(start, idx - 1)) 212 .add(new SavedToken(Token.EOF, null, -1)) 213 .build()); 214 start = idx; 215 } 216 result.add( 217 ImmutableList.<SavedToken>builder() 218 .addAll(tokens.subList(start, tokens.size())) 219 .add(new SavedToken(Token.EOF, null, -1)) 220 .build()); 221 return result; 222 } 223 dropParens()224 private void dropParens() { 225 int depth = 1; 226 while (depth > 0) { 227 switch (token) { 228 case LPAREN: 229 save(); 230 next(); 231 depth++; 232 break; 233 case RPAREN: 234 save(); 235 next(); 236 depth--; 237 break; 238 case EOF: 239 throw error(ErrorKind.UNEXPECTED_EOF); 240 default: 241 save(); 242 next(); 243 break; 244 } 245 } 246 } 247 dropBraces()248 private void dropBraces() { 249 int depth = 1; 250 while (depth > 0) { 251 switch (token) { 252 case LBRACE: 253 save(); 254 next(); 255 depth++; 256 break; 257 case RBRACE: 258 save(); 259 next(); 260 depth--; 261 break; 262 case EOF: 263 throw error(ErrorKind.UNEXPECTED_EOF); 264 default: 265 save(); 266 next(); 267 break; 268 } 269 } 270 } 271 save()272 private void save() { 273 tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position())); 274 } 275 dropBracks(int many)276 private void dropBracks(int many) { 277 if (state != FieldInitState.TYPE) { 278 return; 279 } 280 if (depth <= many) { 281 state = FieldInitState.START; 282 } 283 depth -= many; 284 int lastType = -1; 285 int lastComma = -1; 286 for (int i = 0; i < many; i++) { 287 lastType = ltIndices.removeLast(); 288 lastComma = commaIndices.removeLast(); 289 } 290 // The only known type argument locations that require look-ahead to classify are method 291 // references with parametric receivers, and qualified nested type names: 292 switch (token) { 293 case COLONCOLON: 294 case DOT: 295 this.tokens = tokens.subList(0, lastType); 296 this.commas = commas.subList(0, lastComma); 297 break; 298 default: 299 break; 300 } 301 } 302 303 /** 304 * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are 305 * unambiguously type argument lists, not less-than. 306 * 307 * <p>Since the lexer munches multiple close braces as a single token, there's handling of right 308 * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`. 309 */ dropTypeArguments()310 private void dropTypeArguments() { 311 if (token != Token.LT) { 312 return; 313 } 314 next(); 315 int depth = 1; 316 while (depth > 0) { 317 switch (token) { 318 case LT: 319 depth++; 320 next(); 321 break; 322 case GTGTGT: 323 depth -= 3; 324 next(); 325 break; 326 case GTGT: 327 depth -= 2; 328 next(); 329 break; 330 case GT: 331 depth--; 332 next(); 333 break; 334 default: 335 next(); 336 break; 337 } 338 } 339 } 340 341 @CheckReturnValue error(ErrorKind kind, Object... args)342 private TurbineError error(ErrorKind kind, Object... args) { 343 return TurbineError.format( 344 lexer.source(), 345 Math.min(lexer.position(), lexer.source().source().length() - 1), 346 kind, 347 args); 348 } 349 } 350