• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2016 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.turbine.parse;
18 
19 import com.google.common.collect.ImmutableList;
20 import com.google.errorprone.annotations.CheckReturnValue;
21 import com.google.turbine.diag.TurbineError;
22 import com.google.turbine.diag.TurbineError.ErrorKind;
23 import java.util.ArrayDeque;
24 import java.util.ArrayList;
25 import java.util.List;
26 
27 /**
28  * Pre-process variable initializer expressions to handle multi-variable declarations.
29  *
30  * <p>Turbine needs to be able to parse compile-time constant expressions in constant variable
31  * intializers and annotations. Parsing JLS 15.28 constant expressions is much easier than parsing
32  * the full expression language, so we pre-process variable initializers to extract the expression
33  * and then parse it with an simple constant expression parser that fails if it sees an expression
34  * it doesn't understand.
35  *
36  * <p>To extract the (possibly constant) expression, we can usually just scan ahead to the
37  * semi-colon at the end of the variable. To avoid matching on semi-colons inside lambdas or
38  * anonymous class declarations, the preprocessor also matches braces.
39  *
40  * <p>That handles everything except multi-variable declarations (int x = 1, y = 2;), which in
41  * hindsight were probably a mistake. Multi-variable declarations contain a list of name and
42  * initializer pairs separated by commas. The initializer expressions may also contain commas, so
43  * it's non-trivial to split on initializer boundaries. For example, consider {@code int x = a < b,
44  * c = d;}. We can't tell looking at the prefix {@code a < b, c} whether that's a less-than
45  * expression followed by another initializer, or the start of a generic type: {@code a<b, c>.foo(}.
46  * Distinguishing between these cases requires arbitrary lookahead.
47  *
48  * <p>The preprocessor seems to be operationally correct. It's possible there are edge cases that it
49  * doesn't handle, but it's extremely rare for compile-time constant multi-variable declarations to
50  * contain complex generics. Multi-variable declarations are also disallowed by the Style guide.
51  */
52 public class VariableInitializerParser {
53 
54   enum FieldInitState {
55     /** The beginning of an initializer expression. */
56     START,
57     /** The state after `<identifier> <`. */
58     TYPE,
59   }
60 
61   /** Indices into {@code LT} tokens used for backtracking. */
62   final ArrayDeque<Integer> ltIndices = new ArrayDeque<>();
63 
64   /** Indices into {@code commas} used for backtracking. */
65   final ArrayDeque<Integer> commaIndices = new ArrayDeque<>();
66 
67   /** The saved tokens. */
68   List<SavedToken> tokens = new ArrayList<>();
69 
70   /**
71    * Indices of boundaries between variable initializers in {@code tokens} (which are indicated by
72    * commas in the input).
73    */
74   List<Integer> commas = new ArrayList<>();
75 
76   public Token token;
77   FieldInitState state = FieldInitState.START;
78   int depth = 0;
79 
80   final Lexer lexer;
81 
VariableInitializerParser(Token token, Lexer lexer)82   public VariableInitializerParser(Token token, Lexer lexer) {
83     this.token = token;
84     this.lexer = lexer;
85   }
86 
next()87   private void next() {
88     token = lexer.next();
89   }
90 
91   /** Returns lists of tokens for individual initializers in a (mutli-)variable initializer. */
parseInitializers()92   public List<List<SavedToken>> parseInitializers() {
93     OUTER:
94     while (true) {
95       switch (token) {
96         case IDENT:
97           save();
98           next();
99           if (state == FieldInitState.START) {
100             if (token == Token.LT) {
101               state = FieldInitState.TYPE;
102               depth = 1;
103               ltIndices.clear();
104               commaIndices.clear();
105               ltIndices.addLast(tokens.size());
106               commaIndices.addLast(commas.size());
107               save();
108               next();
109               break;
110             }
111           }
112           break;
113         case LT:
114           if (state == FieldInitState.TYPE) {
115             depth++;
116             ltIndices.addLast(tokens.size());
117             commaIndices.addLast(commas.size());
118           }
119           save();
120           next();
121           break;
122         case GTGTGT:
123           save();
124           next();
125           dropBracks(3);
126           break;
127         case GTGT:
128           save();
129           next();
130           dropBracks(2);
131           break;
132         case GT:
133           save();
134           next();
135           dropBracks(1);
136           break;
137         case LPAREN:
138           save();
139           next();
140           dropParens();
141           break;
142         case LBRACE:
143           save();
144           next();
145           dropBraces();
146           break;
147         case SEMI:
148           switch (state) {
149             case START:
150             case TYPE:
151               break OUTER;
152           }
153           save();
154           next();
155           break;
156         case COMMA:
157           save();
158           next();
159           switch (state) {
160             case START:
161             case TYPE:
162               commas.add(tokens.size());
163               break;
164           }
165           break;
166         case DOT:
167           save();
168           next();
169           dropTypeArguments();
170           break;
171         case NEW:
172           save();
173           next();
174           dropTypeArguments();
175           while (token == Token.IDENT) {
176             save();
177             next();
178             dropTypeArguments();
179             if (token == Token.DOT) {
180               next();
181             } else {
182               break;
183             }
184           }
185           break;
186         case COLONCOLON:
187           save();
188           next();
189           dropTypeArguments();
190           if (token == Token.NEW) {
191             next();
192           }
193           break;
194         case EOF:
195           break OUTER;
196         default:
197           save();
198           next();
199           break;
200       }
201     }
202     List<List<SavedToken>> result = new ArrayList<>();
203     int start = 0;
204     for (int idx : commas) {
205       result.add(
206           ImmutableList.<SavedToken>builder()
207               .addAll(tokens.subList(start, idx - 1))
208               .add(new SavedToken(Token.EOF, null, tokens.get(idx - 1).position))
209               .build());
210       start = idx;
211     }
212     result.add(
213         ImmutableList.<SavedToken>builder()
214             .addAll(tokens.subList(start, tokens.size()))
215             .add(new SavedToken(Token.EOF, null, lexer.position()))
216             .build());
217     return result;
218   }
219 
dropParens()220   private void dropParens() {
221     int depth = 1;
222     while (depth > 0) {
223       switch (token) {
224         case LPAREN:
225           save();
226           next();
227           depth++;
228           break;
229         case RPAREN:
230           save();
231           next();
232           depth--;
233           break;
234         case EOF:
235           throw error(ErrorKind.UNEXPECTED_EOF);
236         default:
237           save();
238           next();
239           break;
240       }
241     }
242   }
243 
dropBraces()244   private void dropBraces() {
245     int depth = 1;
246     while (depth > 0) {
247       switch (token) {
248         case LBRACE:
249           save();
250           next();
251           depth++;
252           break;
253         case RBRACE:
254           save();
255           next();
256           depth--;
257           break;
258         case EOF:
259           throw error(ErrorKind.UNEXPECTED_EOF);
260         default:
261           save();
262           next();
263           break;
264       }
265     }
266   }
267 
save()268   private void save() {
269     tokens.add(new SavedToken(token, lexer.stringValue(), lexer.position()));
270   }
271 
dropBracks(int many)272   private void dropBracks(int many) {
273     if (state != FieldInitState.TYPE) {
274       return;
275     }
276     if (depth <= many) {
277       state = FieldInitState.START;
278     }
279     depth -= many;
280     int lastType = -1;
281     int lastComma = -1;
282     for (int i = 0; i < many; i++) {
283       if (ltIndices.isEmpty()) {
284         throw error(ErrorKind.UNEXPECTED_TOKEN, ">");
285       }
286       lastType = ltIndices.removeLast();
287       lastComma = commaIndices.removeLast();
288     }
289     // The only known type argument locations that require look-ahead to classify are method
290     // references with parametric receivers, and qualified nested type names:
291     switch (token) {
292       case COLONCOLON:
293       case DOT:
294         this.tokens = tokens.subList(0, lastType);
295         this.commas = commas.subList(0, lastComma);
296         break;
297       default:
298         break;
299     }
300   }
301 
302   /**
303    * Drops pairs of `<` `>` from the input. Should only be called in contexts where the braces are
304    * unambiguously type argument lists, not less-than.
305    *
306    * <p>Since the lexer munches multiple close braces as a single token, there's handling of right
307    * shifts for cases like the `>>` in `List<SavedToken<String, Integer>>`.
308    */
dropTypeArguments()309   private void dropTypeArguments() {
310     if (token != Token.LT) {
311       return;
312     }
313     next();
314     int depth = 1;
315     while (depth > 0) {
316       switch (token) {
317         case LT:
318           depth++;
319           next();
320           break;
321         case GTGTGT:
322           depth -= 3;
323           next();
324           break;
325         case GTGT:
326           depth -= 2;
327           next();
328           break;
329         case GT:
330           depth--;
331           next();
332           break;
333         case EOF:
334           throw error(ErrorKind.UNEXPECTED_EOF);
335         default:
336           next();
337           break;
338       }
339     }
340   }
341 
342   @CheckReturnValue
error(ErrorKind kind, Object... args)343   private TurbineError error(ErrorKind kind, Object... args) {
344     return TurbineError.format(
345         lexer.source(),
346         Math.min(lexer.position(), lexer.source().source().length() - 1),
347         kind,
348         args);
349   }
350 }
351