• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/parser.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14 
15 const char kGrammar_Help[] =
16     R"*(Language and grammar for GN build files
17 
18 Tokens
19 
20   GN build files are read as sequences of tokens.  While splitting the file
21   into tokens, the next token is the longest sequence of characters that form a
22   valid token.
23 
24 White space and comments
25 
26   White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27   carriage returns (U+000D), and newlines (U+000A).
28 
29   Comments start at the character "#" and stop at the next newline.
30 
31   White space and comments are ignored except that they may separate tokens
32   that would otherwise combine into a single token.
33 
34 Identifiers
35 
36   Identifiers name variables and functions.
37 
38       identifier = letter { letter | digit } .
39       letter     = "A" ... "Z" | "a" ... "z" | "_" .
40       digit      = "0" ... "9" .
41 
42 Keywords
43 
44   The following keywords are reserved and may not be used as identifiers:
45 
46           else    false   if      true
47 
48 Integer literals
49 
50   An integer literal represents a decimal integer value.
51 
52       integer = [ "-" ] digit { digit } .
53 
54   Leading zeros and negative zero are disallowed.
55 
56 String literals
57 
58   A string literal represents a string value consisting of the quoted
59   characters with possible escape sequences and variable expansions.
60 
61       string           = `"` { char | escape | expansion } `"` .
62       escape           = `\` ( "$" | `"` | char ) .
63       BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" .
64       Hex              = "0x" [0-9A-Fa-f][0-9A-Fa-f]
65       expansion        = "$" ( identifier | BracketExpansion | Hex ) .
66       char             = /* any character except "$", `"`, or newline */ .
67 
68   After a backslash, certain sequences represent special characters:
69 
70           \"    U+0022    quotation mark
71           \$    U+0024    dollar sign
72           \\    U+005C    backslash
73 
74   All other backslashes represent themselves.
75 
76   To insert an arbitrary byte value, use $0xFF. For example, to insert a
77   newline character: "Line one$0x0ALine two".
78 
79   An expansion will evaluate the variable following the '$' and insert a
80   stringified version of it into the result. For example, to concat two path
81   components with a slash separating them:
82     "$var_one/$var_two"
83   Use the "${var_one}" format to be explicitly deliniate the variable for
84   otherwise-ambiguous cases.
85 
86 Punctuation
87 
88   The following character sequences represent punctuation:
89 
90           +       +=      ==      !=      (       )
91           -       -=      <       <=      [       ]
92           !       =       >       >=      {       }
93                           &&      ||      .       ,
94 
95 Grammar
96 
97   The input tokens form a syntax tree following a context-free grammar:
98 
99       File = StatementList .
100 
101       Statement     = Assignment | Call | Condition .
102       LValue        = identifier | ArrayAccess | ScopeAccess .
103       Assignment    = LValue AssignOp Expr .
104       Call          = identifier "(" [ ExprList ] ")" [ Block ] .
105       Condition     = "if" "(" Expr ")" Block
106                       [ "else" ( Condition | Block ) ] .
107       Block         = "{" StatementList "}" .
108       StatementList = { Statement } .
109 
110       ArrayAccess = identifier "[" Expr "]" .
111       ScopeAccess = identifier "." identifier .
112       Expr        = UnaryExpr | Expr BinaryOp Expr .
113       UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .
114       PrimaryExpr = identifier | integer | string | Call
115                   | ArrayAccess | ScopeAccess | Block
116                   | "(" Expr ")"
117                   | "[" [ ExprList [ "," ] ] "]" .
118       ExprList    = Expr { "," Expr } .
119 
120       AssignOp = "=" | "+=" | "-=" .
121       UnaryOp  = "!" .
122       BinaryOp = "+" | "-"                  // highest priority
123                | "<" | "<=" | ">" | ">="
124                | "==" | "!="
125                | "&&"
126                | "||" .                     // lowest priority
127 
128   All binary operators are left-associative.
129 
130 Types
131 
132   The GN language is dynamically typed. The following types are used:
133 
134    - Boolean: Uses the keywords "true" and "false". There is no implicit
135      conversion between booleans and integers.
136 
137    - Integers: All numbers in GN are signed 64-bit integers.
138 
139    - Strings: Strings are 8-bit with no enforced encoding. When a string is
140      used to interact with other systems with particular encodings (like the
141      Windows and Mac filesystems) it is assumed to be UTF-8. See "String
142      literals" above for more.
143 
144    - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
145      below for more.
146 
147    - Scopes: Scopes are like dictionaries that use variable names for keys. See
148      "Scopes" below for more.
149 
150 Lists
151 
152   Lists are created with [] and using commas to separate items:
153 
154        mylist = [ 0, 1, 2, "some string" ]
155 
156   A comma after the last item is optional. Lists are dereferenced using 0-based
157   indexing:
158 
159        mylist[0] += 1
160        var = mylist[2]
161 
162   Lists can be concatenated using the '+' and '+=' operators. Bare values can
163   not be concatenated with lists, to add a single item, it must be put into a
164   list of length one.
165 
166   Items can be removed from lists using the '-' and '-=' operators. This will
167   remove all occurrences of every item in the right-hand list from the
168   left-hand list. It is an error to remove an item not in the list. This is to
169   prevent common typos and to detect dead code that is removing things that no
170   longer apply.
171 
172   It is an error to use '=' to replace a nonempty list with another nonempty
173   list. This is to prevent accidentally overwriting data when in most cases
174   '+=' was intended. To overwrite a list on purpose, first assign it to the
175   empty list:
176 
177     mylist = []
178     mylist = otherlist
179 
180 Scopes
181 
182   All execution happens in the context of a scope which holds the current state
183   (like variables). With the exception of loops and conditions, '{' introduces
184   a new scope.
185 
186   Most scopes have a parent reference to the old scope. Variable reads
187   recursively search all parent scopes until the variable is found or there are
188   no more scopes. Variable writes always go into the current scope. This means
189   that after the closing '}' (again excepting loops and conditions), all local
190   variables will be restored to the previous values.  This also means that "foo
191   = foo" can do useful work by copying a variable into the current scope that
192   was defined in a containing scope.
193 
194   Scopes can be assigned to variables. Examples of such scopes are the
195   implicitly-created "invoker" when invoking a template (which refers to the
196   variables set by the invoking code), scopes created by functions like
197   exec_script, and scopes explicitly created like
198 
199     empty_scope = {}
200     myvalues = {
201       foo = 21
202       bar = "something"
203     }
204 
205   In the case of explicitly created scopes and scopes created by functions like
206   exec_script, there is no reference to the parent scope. Such scopes are fully
207   self-contained and do not "inherit" values from their defining scope.
208 
209   Inside an explicit scope definition can be any GN code including conditionals
210   and function calls. After the close of the scope, it will contain all
211   variables explicitly set by the code contained inside it. After this, the
212   values can be read, modified, or added to:
213 
214     myvalues.foo += 2
215     empty_scope.new_thing = [ 1, 2, 3 ]
216 
217   Scope equality is defined as single-level scopes identical within the current
218   scope. That is, all values in the first scope must be present and identical
219   within the second, and vice versa. Note that this means inherited scopes are
220   always unequal by definition.
221 )*";
222 
223 // Precedence constants.
224 //
225 // Currently all operators are left-associative so this list is sequential. To
226 // implement a right-assocative operators in a Pratt parser we would leave gaps
227 // in between the constants, and right-associative operators get a precedence
228 // of "<left-associated-precedence> - 1".
229 enum Precedence {
230   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
231   PRECEDENCE_OR = 2,
232   PRECEDENCE_AND = 3,
233   PRECEDENCE_EQUALITY = 4,
234   PRECEDENCE_RELATION = 5,
235   PRECEDENCE_SUM = 6,
236   PRECEDENCE_PREFIX = 7,
237   PRECEDENCE_CALL = 8,
238   PRECEDENCE_DOT = 9,  // Highest precedence.
239 };
240 
241 // The top-level for blocks/ifs is recursive descent, the expression parser is
242 // a Pratt parser. The basic idea there is to have the precedences (and
243 // associativities) encoded relative to each other and only parse up until you
244 // hit something of that precedence. There's a dispatch table in expressions_
245 // at the top of parser.cc that describes how each token dispatches if it's
246 // seen as either a prefix or infix operator, and if it's infix, what its
247 // precedence is.
248 //
249 // References:
250 // http://javascript.crockford.com/tdop/tdop.html
251 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
252 
253 // Indexed by Token::Type.
254 ParserHelper Parser::expressions_[] = {
255     {nullptr, nullptr, -1},                                   // INVALID
256     {&Parser::Literal, nullptr, -1},                          // INTEGER
257     {&Parser::Literal, nullptr, -1},                          // STRING
258     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
259     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
260     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
261     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
262     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
263     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
264     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
265     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
266     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
267     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
268     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
269     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
270     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
271     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
272     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
273     {&Parser::Not, nullptr, -1},                              // BANG
274     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
275     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
276     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
277     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
278     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
279     {&Parser::Block, nullptr, -1},                            // LEFT_BRACE
280     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
281     {nullptr, nullptr, -1},                                   // IF
282     {nullptr, nullptr, -1},                                   // ELSE
283     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
284     {nullptr, nullptr, -1},                                       // COMMA
285     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
286     {nullptr, nullptr, -1},                // LINE_COMMENT
287     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
288     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
289 };
290 
Parser(const std::vector<Token> & tokens,Err * err)291 Parser::Parser(const std::vector<Token>& tokens, Err* err)
292     : invalid_token_(Location(), Token::INVALID, std::string_view()),
293       err_(err),
294       cur_(0) {
295   for (const auto& token : tokens) {
296     switch (token.type()) {
297       case Token::LINE_COMMENT:
298         line_comment_tokens_.push_back(token);
299         break;
300       case Token::SUFFIX_COMMENT:
301         suffix_comment_tokens_.push_back(token);
302         break;
303       default:
304         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
305         // through the real parser.
306         tokens_.push_back(token);
307         break;
308     }
309   }
310 }
311 
312 Parser::~Parser() = default;
313 
314 // static
Parse(const std::vector<Token> & tokens,Err * err)315 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
316                                          Err* err) {
317   Parser p(tokens, err);
318   return p.ParseFile();
319 }
320 
321 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)322 std::unique_ptr<ParseNode> Parser::ParseExpression(
323     const std::vector<Token>& tokens,
324     Err* err) {
325   Parser p(tokens, err);
326   std::unique_ptr<ParseNode> expr = p.ParseExpression();
327   if (!p.at_end() && !err->has_error()) {
328     *err = Err(p.cur_token(), "Trailing garbage");
329     return nullptr;
330   }
331   return expr;
332 }
333 
334 // static
ParseValue(const std::vector<Token> & tokens,Err * err)335 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
336                                               Err* err) {
337   for (const Token& token : tokens) {
338     switch (token.type()) {
339       case Token::INTEGER:
340       case Token::STRING:
341       case Token::TRUE_TOKEN:
342       case Token::FALSE_TOKEN:
343       case Token::LEFT_BRACKET:
344       case Token::RIGHT_BRACKET:
345       case Token::COMMA:
346         continue;
347       default:
348         *err = Err(token, "Invalid token in literal value");
349         return nullptr;
350     }
351   }
352 
353   return ParseExpression(tokens, err);
354 }
355 
IsAssignment(const ParseNode * node) const356 bool Parser::IsAssignment(const ParseNode* node) const {
357   return node && node->AsBinaryOp() &&
358          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
359           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
360           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
361 }
362 
IsStatementBreak(Token::Type token_type) const363 bool Parser::IsStatementBreak(Token::Type token_type) const {
364   switch (token_type) {
365     case Token::IDENTIFIER:
366     case Token::LEFT_BRACE:
367     case Token::RIGHT_BRACE:
368     case Token::IF:
369     case Token::ELSE:
370       return true;
371     default:
372       return false;
373   }
374 }
375 
LookAhead(Token::Type type)376 bool Parser::LookAhead(Token::Type type) {
377   if (at_end())
378     return false;
379   return cur_token().type() == type;
380 }
381 
Match(Token::Type type)382 bool Parser::Match(Token::Type type) {
383   if (!LookAhead(type))
384     return false;
385   Consume();
386   return true;
387 }
388 
Consume(Token::Type type,const char * error_message)389 const Token& Parser::Consume(Token::Type type, const char* error_message) {
390   Token::Type types[1] = {type};
391   return Consume(types, 1, error_message);
392 }
393 
Consume(Token::Type * types,size_t num_types,const char * error_message)394 const Token& Parser::Consume(Token::Type* types,
395                              size_t num_types,
396                              const char* error_message) {
397   if (has_error()) {
398     // Don't overwrite current error, but make progress through tokens so that
399     // a loop that's expecting a particular token will still terminate.
400     if (!at_end())
401       cur_++;
402     return invalid_token_;
403   }
404   if (at_end()) {
405     const char kEOFMsg[] = "I hit EOF instead.";
406     if (tokens_.empty())
407       *err_ = Err(Location(), error_message, kEOFMsg);
408     else
409       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
410     return invalid_token_;
411   }
412 
413   for (size_t i = 0; i < num_types; ++i) {
414     if (cur_token().type() == types[i])
415       return Consume();
416   }
417   *err_ = Err(cur_token(), error_message);
418   return invalid_token_;
419 }
420 
Consume()421 const Token& Parser::Consume() {
422   return tokens_[cur_++];
423 }
424 
ParseExpression()425 std::unique_ptr<ParseNode> Parser::ParseExpression() {
426   return ParseExpression(0);
427 }
428 
ParseExpression(int precedence)429 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
430   if (at_end())
431     return std::unique_ptr<ParseNode>();
432 
433   const Token& token = Consume();
434   PrefixFunc prefix = expressions_[token.type()].prefix;
435 
436   if (prefix == nullptr) {
437     *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
438     return std::unique_ptr<ParseNode>();
439   }
440 
441   std::unique_ptr<ParseNode> left = (this->*prefix)(token);
442   if (has_error())
443     return left;
444 
445   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
446          precedence <= expressions_[cur_token().type()].precedence) {
447     const Token& next_token = Consume();
448     InfixFunc infix = expressions_[next_token.type()].infix;
449     if (infix == nullptr) {
450       *err_ = Err(next_token,
451                   "Unexpected token '" + std::string(next_token.value()) + "'");
452       return std::unique_ptr<ParseNode>();
453     }
454     left = (this->*infix)(std::move(left), next_token);
455     if (has_error())
456       return std::unique_ptr<ParseNode>();
457   }
458 
459   return left;
460 }
461 
Block(const Token & token)462 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
463   // This entrypoint into ParseBlock means it's part of an expression and we
464   // always want the result.
465   return ParseBlock(token, BlockNode::RETURNS_SCOPE);
466 }
467 
Literal(const Token & token)468 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
469   return std::make_unique<LiteralNode>(token);
470 }
471 
Name(const Token & token)472 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
473   return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
474 }
475 
BlockComment(const Token & token)476 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
477   std::unique_ptr<BlockCommentNode> comment =
478       std::make_unique<BlockCommentNode>();
479   comment->set_comment(token);
480   return comment;
481 }
482 
Group(const Token & token)483 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
484   std::unique_ptr<ParseNode> expr = ParseExpression();
485   if (has_error())
486     return std::unique_ptr<ParseNode>();
487   Consume(Token::RIGHT_PAREN, "Expected ')'");
488   return expr;
489 }
490 
Not(const Token & token)491 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
492   std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
493   if (has_error())
494     return std::unique_ptr<ParseNode>();
495   if (!expr) {
496     if (!has_error())
497       *err_ = Err(token, "Expected right-hand side for '!'.");
498     return std::unique_ptr<ParseNode>();
499   }
500   std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
501   unary_op->set_op(token);
502   unary_op->set_operand(std::move(expr));
503   return unary_op;
504 }
505 
List(const Token & node)506 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
507   std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
508   if (!has_error() && !at_end())
509     Consume(Token::RIGHT_BRACKET, "Expected ']'");
510   return list;
511 }
512 
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)513 std::unique_ptr<ParseNode> Parser::BinaryOperator(
514     std::unique_ptr<ParseNode> left,
515     const Token& token) {
516   std::unique_ptr<ParseNode> right =
517       ParseExpression(expressions_[token.type()].precedence + 1);
518   if (!right) {
519     if (!has_error()) {
520       *err_ = Err(token, "Expected right-hand side for '" +
521                              std::string(token.value()) + "'");
522     }
523     return std::unique_ptr<ParseNode>();
524   }
525   std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
526   binary_op->set_op(token);
527   binary_op->set_left(std::move(left));
528   binary_op->set_right(std::move(right));
529   return binary_op;
530 }
531 
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)532 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
533     std::unique_ptr<ParseNode> left,
534     const Token& token) {
535   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
536   list->set_begin_token(token);
537   list->set_end(std::make_unique<EndNode>(token));
538   std::unique_ptr<BlockNode> block;
539   bool has_arg = false;
540   if (LookAhead(Token::LEFT_PAREN)) {
541     const Token& start_token = Consume();
542     // Parsing a function call.
543     has_arg = true;
544     if (Match(Token::RIGHT_PAREN)) {
545       // Nothing, just an empty call.
546     } else {
547       list = ParseList(start_token, Token::RIGHT_PAREN, false);
548       if (has_error())
549         return std::unique_ptr<ParseNode>();
550       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
551     }
552     // Optionally with a scope.
553     if (LookAhead(Token::LEFT_BRACE)) {
554       block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
555       if (has_error())
556         return std::unique_ptr<ParseNode>();
557     }
558   }
559 
560   if (!left && !has_arg) {
561     // Not a function call, just a standalone identifier.
562     return std::make_unique<IdentifierNode>(token);
563   }
564   std::unique_ptr<FunctionCallNode> func_call =
565       std::make_unique<FunctionCallNode>();
566   func_call->set_function(token);
567   func_call->set_args(std::move(list));
568   if (block)
569     func_call->set_block(std::move(block));
570   return func_call;
571 }
572 
Assignment(std::unique_ptr<ParseNode> left,const Token & token)573 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
574                                               const Token& token) {
575   if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
576     *err_ = Err(left.get(),
577                 "The left-hand side of an assignment must be an identifier, "
578                 "scope access, or array access.");
579     return std::unique_ptr<ParseNode>();
580   }
581   std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
582   if (!value) {
583     if (!has_error())
584       *err_ = Err(token, "Expected right-hand side for assignment.");
585     return std::unique_ptr<ParseNode>();
586   }
587   std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
588   assign->set_op(token);
589   assign->set_left(std::move(left));
590   assign->set_right(std::move(value));
591   return assign;
592 }
593 
Subscript(std::unique_ptr<ParseNode> left,const Token & token)594 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
595                                              const Token& token) {
596   // TODO: Maybe support more complex expressions like a[0][0]. This would
597   // require work on the evaluator too.
598   if (left->AsIdentifier() == nullptr) {
599     *err_ = Err(
600         left.get(), "May only subscript identifiers.",
601         "The thing on the left hand side of the [] must be an identifier\n"
602         "and not an expression. If you need this, you'll have to assign the\n"
603         "value to a temporary before subscripting. Sorry.");
604     return std::unique_ptr<ParseNode>();
605   }
606   std::unique_ptr<ParseNode> value = ParseExpression();
607   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
608   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
609   accessor->set_base(left->AsIdentifier()->value());
610   accessor->set_subscript(std::move(value));
611   return accessor;
612 }
613 
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)614 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
615                                                const Token& token) {
616   if (left->AsIdentifier() == nullptr) {
617     *err_ = Err(
618         left.get(), "May only use \".\" for identifiers.",
619         "The thing on the left hand side of the dot must be an identifier\n"
620         "and not an expression. If you need this, you'll have to assign the\n"
621         "value to a temporary first. Sorry.");
622     return std::unique_ptr<ParseNode>();
623   }
624 
625   std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
626   if (!right || !right->AsIdentifier()) {
627     *err_ = Err(
628         token, "Expected identifier for right-hand-side of \".\"",
629         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
630     return std::unique_ptr<ParseNode>();
631   }
632 
633   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
634   accessor->set_base(left->AsIdentifier()->value());
635   accessor->set_member(std::unique_ptr<IdentifierNode>(
636       static_cast<IdentifierNode*>(right.release())));
637   return accessor;
638 }
639 
640 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)641 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
642                                             Token::Type stop_before,
643                                             bool allow_trailing_comma) {
644   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
645   list->set_begin_token(start_token);
646   bool just_got_comma = false;
647   bool first_time = true;
648   while (!LookAhead(stop_before)) {
649     if (!first_time) {
650       if (!just_got_comma) {
651         // Require commas separate things in lists.
652         *err_ = Err(cur_token(), "Expected comma between items.");
653         return std::unique_ptr<ListNode>();
654       }
655     }
656     first_time = false;
657 
658     // Why _OR? We're parsing things that are higher precedence than the ,
659     // that separates the items of the list. , should appear lower than
660     // boolean expressions (the lowest of which is OR), but above assignments.
661     list->append_item(ParseExpression(PRECEDENCE_OR));
662     if (has_error())
663       return std::unique_ptr<ListNode>();
664     if (at_end()) {
665       *err_ =
666           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
667       return std::unique_ptr<ListNode>();
668     }
669     if (list->contents().back()->AsBlockComment()) {
670       // If there was a comment inside the list, we don't need a comma to the
671       // next item, so pretend we got one, if we're expecting one.
672       just_got_comma = allow_trailing_comma;
673     } else {
674       just_got_comma = Match(Token::COMMA);
675     }
676   }
677   if (just_got_comma && !allow_trailing_comma) {
678     *err_ = Err(cur_token(), "Trailing comma");
679     return std::unique_ptr<ListNode>();
680   }
681   list->set_end(std::make_unique<EndNode>(cur_token()));
682   return list;
683 }
684 
ParseFile()685 std::unique_ptr<ParseNode> Parser::ParseFile() {
686   std::unique_ptr<BlockNode> file =
687       std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
688   for (;;) {
689     if (at_end())
690       break;
691     std::unique_ptr<ParseNode> statement = ParseStatement();
692     if (!statement)
693       break;
694     file->append_statement(std::move(statement));
695   }
696   if (!at_end() && !has_error())
697     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
698   if (has_error())
699     return std::unique_ptr<ParseNode>();
700 
701   // TODO(scottmg): If this is measurably expensive, it could be done only
702   // when necessary (when reformatting, or during tests). Comments are
703   // separate from the parse tree at this point, so downstream code can remain
704   // ignorant of them.
705   AssignComments(file.get());
706 
707   return file;
708 }
709 
ParseStatement()710 std::unique_ptr<ParseNode> Parser::ParseStatement() {
711   if (LookAhead(Token::IF)) {
712     return ParseCondition();
713   } else if (LookAhead(Token::BLOCK_COMMENT)) {
714     return BlockComment(Consume());
715   } else {
716     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
717     // to allow "pointless" expressions and return ParseExpression() directly.
718     std::unique_ptr<ParseNode> stmt = ParseExpression();
719     if (stmt) {
720       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
721         return stmt;
722     }
723     if (!has_error()) {
724       const Token& token = cur_or_last_token();
725       *err_ = Err(token, "Expecting assignment or function call.");
726     }
727     return std::unique_ptr<ParseNode>();
728   }
729 }
730 
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)731 std::unique_ptr<BlockNode> Parser::ParseBlock(
732     const Token& begin_brace,
733     BlockNode::ResultMode result_mode) {
734   if (has_error())
735     return std::unique_ptr<BlockNode>();
736   std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
737   block->set_begin_token(begin_brace);
738 
739   for (;;) {
740     if (LookAhead(Token::RIGHT_BRACE)) {
741       block->set_end(std::make_unique<EndNode>(Consume()));
742       break;
743     }
744 
745     std::unique_ptr<ParseNode> statement = ParseStatement();
746     if (!statement)
747       return std::unique_ptr<BlockNode>();
748     block->append_statement(std::move(statement));
749   }
750   return block;
751 }
752 
ParseCondition()753 std::unique_ptr<ParseNode> Parser::ParseCondition() {
754   std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
755   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
756   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
757   condition->set_condition(ParseExpression());
758   if (IsAssignment(condition->condition()))
759     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
760   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
761   condition->set_if_true(ParseBlock(
762       Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
763       BlockNode::DISCARDS_RESULT));
764   if (Match(Token::ELSE)) {
765     if (LookAhead(Token::LEFT_BRACE)) {
766       condition->set_if_false(
767           ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
768     } else if (LookAhead(Token::IF)) {
769       condition->set_if_false(ParseStatement());
770     } else {
771       *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
772       return std::unique_ptr<ParseNode>();
773     }
774   }
775   if (has_error())
776     return std::unique_ptr<ParseNode>();
777   return condition;
778 }
779 
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)780 void Parser::TraverseOrder(const ParseNode* root,
781                            std::vector<const ParseNode*>* pre,
782                            std::vector<const ParseNode*>* post) {
783   if (root) {
784     pre->push_back(root);
785 
786     if (const AccessorNode* accessor = root->AsAccessor()) {
787       TraverseOrder(accessor->subscript(), pre, post);
788       TraverseOrder(accessor->member(), pre, post);
789     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
790       TraverseOrder(binop->left(), pre, post);
791       TraverseOrder(binop->right(), pre, post);
792     } else if (const BlockNode* block = root->AsBlock()) {
793       for (const auto& statement : block->statements())
794         TraverseOrder(statement.get(), pre, post);
795       TraverseOrder(block->End(), pre, post);
796     } else if (const ConditionNode* condition = root->AsCondition()) {
797       TraverseOrder(condition->condition(), pre, post);
798       TraverseOrder(condition->if_true(), pre, post);
799       TraverseOrder(condition->if_false(), pre, post);
800     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
801       TraverseOrder(func_call->args(), pre, post);
802       TraverseOrder(func_call->block(), pre, post);
803     } else if (root->AsIdentifier()) {
804       // Nothing.
805     } else if (const ListNode* list = root->AsList()) {
806       for (const auto& node : list->contents())
807         TraverseOrder(node.get(), pre, post);
808       TraverseOrder(list->End(), pre, post);
809     } else if (root->AsLiteral()) {
810       // Nothing.
811     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
812       TraverseOrder(unaryop->operand(), pre, post);
813     } else if (root->AsBlockComment()) {
814       // Nothing.
815     } else if (root->AsEnd()) {
816       // Nothing.
817     } else {
818       CHECK(false) << "Unhandled case in TraverseOrder.";
819     }
820 
821     post->push_back(root);
822   }
823 }
824 
AssignComments(ParseNode * file)825 void Parser::AssignComments(ParseNode* file) {
826   // Start by generating a pre- and post- order traversal of the tree so we
827   // can determine what's before and after comments.
828   std::vector<const ParseNode*> pre;
829   std::vector<const ParseNode*> post;
830   TraverseOrder(file, &pre, &post);
831 
832   // Assign line comments to syntax immediately following.
833   int cur_comment = 0;
834   for (auto* node : pre) {
835     if (node->GetRange().is_null()) {
836       CHECK_EQ(node, file) << "Only expected on top file node";
837       continue;
838     }
839     const Location start = node->GetRange().begin();
840     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
841       if (start >= line_comment_tokens_[cur_comment].location()) {
842         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
843             line_comment_tokens_[cur_comment]);
844         ++cur_comment;
845       } else {
846         break;
847       }
848     }
849   }
850 
851   // Remaining line comments go at end of file.
852   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
853        ++cur_comment)
854     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
855 
856   // Assign suffix to syntax immediately before.
857   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
858   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
859        i != post.rend(); ++i) {
860     // Don't assign suffix comments to the function, list, or block, but instead
861     // to the last thing inside.
862     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
863       continue;
864 
865     Location start = (*i)->GetRange().begin();
866     Location end = (*i)->GetRange().end();
867 
868     // Don't assign suffix comments to something that starts on an earlier
869     // line, so that in:
870     //
871     // sources = [ "a",
872     //     "b" ] # comment
873     //
874     // it's attached to "b", not sources = [ ... ].
875     if (start.line_number() != end.line_number())
876       continue;
877 
878     while (cur_comment >= 0) {
879       if (end <= suffix_comment_tokens_[cur_comment].location()) {
880         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
881             suffix_comment_tokens_[cur_comment]);
882         --cur_comment;
883       } else {
884         break;
885       }
886     }
887 
888     // Suffix comments were assigned in reverse, so if there were multiple on
889     // the same node, they need to be reversed.
890     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
891       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
892   }
893 }
894 
IndentFor(int value)895 std::string IndentFor(int value) {
896   return std::string(value, ' ');
897 }
898 
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)899 void RenderToText(const base::Value& node,
900                   int indent_level,
901                   std::ostringstream& os) {
902   const base::Value* child = node.FindKey(std::string("child"));
903   std::string node_type(node.FindKey("type")->GetString());
904   if (node_type == "ACCESSOR") {
905     // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
906     // for the base.
907     os << IndentFor(indent_level) << node_type << std::endl;
908     os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
909        << std::endl;
910   } else {
911     os << IndentFor(indent_level) << node_type;
912     if (node.FindKey("value")) {
913       os << "(" << node.FindKey("value")->GetString() << ")";
914     }
915     os << std::endl;
916   }
917   if (node.FindKey(kJsonBeforeComment)) {
918     for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
919       os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
920          << "\")\n";
921     }
922   }
923   if (node.FindKey(kJsonSuffixComment)) {
924     for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
925       os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
926          << "\")\n";
927     }
928   }
929   if (node.FindKey(kJsonAfterComment)) {
930     for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
931       os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
932          << "\")\n";
933     }
934   }
935   if (child) {
936     for (const base::Value& n : child->GetList()) {
937       RenderToText(n, indent_level + 1, os);
938     }
939   }
940   const base::Value* end = node.FindKey("end");
941   if (end &&
942       (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) ||
943        end->FindKey(kJsonAfterComment))) {
944     RenderToText(*end, indent_level + 1, os);
945   }
946 }
947