• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.turbine.parse;
18 
19 import com.google.common.collect.ImmutableList;
20 import com.google.errorprone.annotations.CheckReturnValue;
21 import com.google.turbine.diag.TurbineError;
22 import com.google.turbine.diag.TurbineError.ErrorKind;
23 import java.util.ArrayDeque;
24 import java.util.ArrayList;
25 import java.util.List;
26 
27 /**
28  * Pre-process variable initializer expressions to handle multi-variable declarations.
29  *
30  * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable
31  * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing
32  * the full expression language, so we pre-process variable initializers to extract the expression
33  * and then parse it with an simple constant expression parser that fails if it sees an expression
34  * it doesn't understand.
35  *
36  * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the
37  * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or
38  * anonymous class declarations, the preprocessor also matches braces.
39  *
40  * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in
41  * hindsight were probably a mistake. Multi-variable declarations contain a list of name and
42  * initializer pairs separated by commas. The initializer expressions may also contain commas, so
43  * it's non-trivial to split on initializer boundaries. For example, consider `int x = a < b, c =
44  * d;`. We can't tell looking at the prefix `a < b, c` whether that's a less-than expression
45  * followed by another initializer, or the start of a generic type: `a<b, c>.foo()`. Distinguishing
46  * between these cases requires arbitrary lookahead.
47  *
48  * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it
49  * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to
50  * contain complex generics. Multi-variable declarations are also disallowed by the Style guide.
51  */
52 public class VariableInitializerParser {
53 
54   enum FieldInitState {
55     /** The beginning of an initializer expression. */
56     START,
57     /** The state after `<identifier> <`. */
58     TYPE,
59   }
60 
61   /** Indices into {@code LT} tokens used for backtracking. */
62   final ArrayDeque<Integer> ltIndices = new ArrayDeque<>();
63 
64   /** Indices into {@code commas} used for backtracking. */
65   final ArrayDeque<Integer> commaIndices = new ArrayDeque<>();
66 
67   /** The saved tokens. */
68   List<SavedToken> tokens = new ArrayList<>();
69 
70   /**
71    * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by
72    * commas in the input).
73    */
74   List<Integer> commas = new ArrayList<>();
75 
76   public Token token;
77   FieldInitState state = FieldInitState.START;
78   int depth = 0;
79 
80   final Lexer lexer;
81 
VariableInitializerParser(Token token, Lexer lexer)82   public VariableInitializerParser(Token token, Lexer lexer) {
83     this.token = token;
84     this.lexer = lexer;
85   }
86 
next()87   private void next() {
88     token = lexer.next();
89   }
90 
91   /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */
parseInitializers()92   public List<List<SavedToken>> parseInitializers() {
93     OUTER:
94     while (true) {
95       switch (token) {
96         case IDENT:
97           save();
98           next();
99           if (state == FieldInitState.START) {
100             if (token == Token.LT) {
101               state = FieldInitState.TYPE;
102               depth = 1;
103               ltIndices.clear();
104               commaIndices.clear();
105               ltIndices.addLast(tokens.size());
106               commaIndices.addLast(commas.size());
107               save();
108               next();
109               break;
110             }
111           }
112           break;
113         case LT:
114           if (state == FieldInitState.TYPE) {
115             depth++;
116             ltIndices.addLast(tokens.size());
117             commaIndices.addLast(commas.size());
118           }
119           save();
120           next();
121           break;
122         case GTGTGT:
123           save();
124           next();
125           dropBracks(3);
126           break;
127         case GTGT:
128           save();
129           next();
130           dropBracks(2);
131           break;
132         case GT:
133           save();
134           next();
135           dropBracks(1);
136           break;
137         case LPAREN:
138           save();
139           next();
140           dropParens();
141           break;
142         case LBRACE:
143           save();
144           next();
145           dropBraces();
146           break;
147         case SEMI:
148           switch (state) {
149             case START:
150             case TYPE:
151               break OUTER;
152             default:
153               break;
154           }
155           save();
156           next();
157           break;
158         case COMMA:
159           save();
160           next();
161           switch (state) {
162             case START:
163             case TYPE:
164               commas.add(tokens.size());
165               break;
166             default:
167               break;
168           }
169           break;
170         case DOT:
171           save();
172           next();
173           dropTypeArguments();
174           break;
175         case NEW:
176           save();
177           next();
178           dropTypeArguments();
179           while (token == Token.IDENT) {
180             save();
181             next();
182             dropTypeArguments();
183             if (token == Token.DOT) {
184               next();
185             } else {
186               break;
187             }
188           }
189           break;
190         case COLONCOLON:
191           save();
192           next();
193           dropTypeArguments();
194           if (token == Token.NEW) {
195             next();
196           }
197           break;
198         case EOF:
199           break OUTER;
200         default:
201           save();
202           next();
203           break;
204       }
205     }
206     List<List<SavedToken>> result = new ArrayList<>();
207     int start = 0;
208     for (int idx : commas) {
209       result.add(
210           ImmutableList.<SavedToken>builder()
211               .addAll(tokens.subList(start, idx - 1))
212               .add(new SavedToken(Token.EOF, null, -1))
213               .build());
214       start = idx;
215     }
216     result.add(
217         ImmutableList.<SavedToken>builder()
218             .addAll(tokens.subList(start, tokens.size()))
219             .add(new SavedToken(Token.EOF, null, -1))
220             .build());
221     return result;
222   }
223 
dropParens()224   private void dropParens() {
225     int depth = 1;
226     while (depth > 0) {
227       switch (token) {
228         case LPAREN:
229           save();
230           next();
231           depth++;
232           break;
233         case RPAREN:
234           save();
235           next();
236           depth--;
237           break;
238         case EOF:
239           throw error(ErrorKind.UNEXPECTED_EOF);
240         default:
241           save();
242           next();
243           break;
244       }
245     }
246   }
247 
dropBraces()248   private void dropBraces() {
249     int depth = 1;
250     while (depth > 0) {
251       switch (token) {
252         case LBRACE:
253           save();
254           next();
255           depth++;
256           break;
257         case RBRACE:
258           save();
259           next();
260           depth--;
261           break;
262         case EOF:
263           throw error(ErrorKind.UNEXPECTED_EOF);
264         default:
265           save();
266           next();
267           break;
268       }
269     }
270   }
271 
save()272   private void save() {
273     tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position()));
274   }
275 
dropBracks(int many)276   private void dropBracks(int many) {
277     if (state != FieldInitState.TYPE) {
278       return;
279     }
280     if (depth <= many) {
281       state = FieldInitState.START;
282     }
283     depth -= many;
284     int lastType = -1;
285     int lastComma = -1;
286     for (int i = 0; i < many; i++) {
287       lastType = ltIndices.removeLast();
288       lastComma = commaIndices.removeLast();
289     }
290     // The only known type argument locations that require look-ahead to classify are method
291     // references with parametric receivers, and qualified nested type names:
292     switch (token) {
293       case COLONCOLON:
294       case DOT:
295         this.tokens = tokens.subList(0, lastType);
296         this.commas = commas.subList(0, lastComma);
297         break;
298       default:
299         break;
300     }
301   }
302 
303   /**
304    * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are
305    * unambiguously type argument lists, not less-than.
306    *
307    * <p>Since the lexer munches multiple close braces as a single token, there's handling of right
308    * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`.
309    */
dropTypeArguments()310   private void dropTypeArguments() {
311     if (token != Token.LT) {
312       return;
313     }
314     next();
315     int depth = 1;
316     while (depth > 0) {
317       switch (token) {
318         case LT:
319           depth++;
320           next();
321           break;
322         case GTGTGT:
323           depth -= 3;
324           next();
325           break;
326         case GTGT:
327           depth -= 2;
328           next();
329           break;
330         case GT:
331           depth--;
332           next();
333           break;
334         default:
335           next();
336           break;
337       }
338     }
339   }
340 
341   @CheckReturnValue
error(ErrorKind kind, Object... args)342   private TurbineError error(ErrorKind kind, Object... args) {
343     return TurbineError.format(
344         lexer.source(),
345         Math.min(lexer.position(), lexer.source().source().length() - 1),
346         kind,
347         args);
348   }
349 }
350