• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "gn/parser.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14 
15 const char kGrammar_Help[] =
16     R"*(Language and grammar for GN build files
17 
18 Tokens
19 
20   GN build files are read as sequences of tokens.  While splitting the file
21   into tokens, the next token is the longest sequence of characters that form a
22   valid token.
23 
24 White space and comments
25 
26   White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27   carriage returns (U+000D), and newlines (U+000A).
28 
29   Comments start at the character "#" and stop at the next newline.
30 
31   White space and comments are ignored except that they may separate tokens
32   that would otherwise combine into a single token.
33 
34 Identifiers
35 
36   Identifiers name variables and functions.
37 
38       identifier = letter { letter | digit } .
39       letter     = "A" ... "Z" | "a" ... "z" | "_" .
40       digit      = "0" ... "9" .
41 
42 Keywords
43 
44   The following keywords are reserved and may not be used as identifiers:
45 
46           else    false   if      true
47 
48 Integer literals
49 
50   An integer literal represents a decimal integer value.
51 
52       integer = [ "-" ] digit { digit } .
53 
54   Leading zeros and negative zero are disallowed.
55 
56 String literals
57 
58   A string literal represents a string value consisting of the quoted
59   characters with possible escape sequences and variable expansions.
60 
61       string           = `"` { char | escape | expansion } `"` .
62       escape           = `\` ( "$" | `"` | char ) .
63       BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess "
64                          ") "}" .
65       Hex              = "0x" [0-9A-Fa-f][0-9A-Fa-f]
66       expansion        = "$" ( identifier | BracketExpansion | Hex ) .
67       char             = /* any character except "$", `"`, or newline "
68                         "*/ .
69 
70   After a backslash, certain sequences represent special characters:
71 
72           \"    U+0022    quotation mark
73           \$    U+0024    dollar sign
74           \\    U+005C    backslash
75 
76   All other backslashes represent themselves.
77 
78   To insert an arbitrary byte value, use $0xFF. For example, to insert a
79   newline character: "Line one$0x0ALine two".
80 
81   An expansion will evaluate the variable following the '$' and insert a
82   stringified version of it into the result. For example, to concat two path
83   components with a slash separating them:
84     "$var_one/$var_two"
85   Use the "${var_one}" format to be explicitly deliniate the variable for
86   otherwise-ambiguous cases.
87 
88 Punctuation
89 
90   The following character sequences represent punctuation:
91 
92           +       +=      ==      !=      (       )
93           -       -=      <       <=      [       ]
94           !       =       >       >=      {       }
95                           &&      ||      .       ,
96 
97 Grammar
98 
99   The input tokens form a syntax tree following a context-free grammar:
100 
101       File = StatementList .
102 
103       Statement     = Assignment | Call | Condition .
104       LValue        = identifier | ArrayAccess | ScopeAccess .
105       Assignment    = LValue AssignOp Expr .
106       Call          = identifier "(" [ ExprList ] ")" [ Block ] .
107       Condition     = "if" "(" Expr ")" Block
108                       [ "else" ( Condition | Block ) ] .
109       Block         = "{" StatementList "}" .
110       StatementList = { Statement } .
111 
112       ArrayAccess = identifier "[" Expr "]" .
113       ScopeAccess = identifier "." identifier .
114       Expr        = UnaryExpr | Expr BinaryOp Expr .
115       UnaryExpr   = PrimaryExpr | UnaryOp UnaryExpr .
116       PrimaryExpr = identifier | integer | string | Call
117                   | ArrayAccess | ScopeAccess | Block
118                   | "(" Expr ")"
119                   | "[" [ ExprList [ "," ] ] "]" .
120       ExprList    = Expr { "," Expr } .
121 
122       AssignOp = "=" | "+=" | "-=" .
123       UnaryOp  = "!" .
124       BinaryOp = "+" | "-"                  // highest priority
125                | "<" | "<=" | ">" | ">="
126                | "==" | "!="
127                | "&&"
128                | "||" .                     // lowest priority
129 
130   All binary operators are left-associative.
131 
132 Types
133 
134   The GN language is dynamically typed. The following types are used:
135 
136    - Boolean: Uses the keywords "true" and "false". There is no implicit
137      conversion between booleans and integers.
138 
139    - Integers: All numbers in GN are signed 64-bit integers.
140 
141    - Strings: Strings are 8-bit with no enforced encoding. When a string is
142      used to interact with other systems with particular encodings (like the
143      Windows and Mac filesystems) it is assumed to be UTF-8. See "String
144      literals" above for more.
145 
146    - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
147      below for more.
148 
149    - Scopes: Scopes are like dictionaries that use variable names for keys. See
150      "Scopes" below for more.
151 
152 Lists
153 
154   Lists are created with [] and using commas to separate items:
155 
156        mylist = [ 0, 1, 2, "some string" ]
157 
158   A comma after the last item is optional. Lists are dereferenced using 0-based
159   indexing:
160 
161        mylist[0] += 1
162        var = mylist[2]
163 
164   Lists can be concatenated using the '+' and '+=' operators. Bare values can
165   not be concatenated with lists, to add a single item, it must be put into a
166   list of length one.
167 
168   Items can be removed from lists using the '-' and '-=' operators. This will
169   remove all occurrences of every item in the right-hand list from the
170   left-hand list. It is an error to remove an item not in the list. This is to
171   prevent common typos and to detect dead code that is removing things that no
172   longer apply.
173 
174   It is an error to use '=' to replace a nonempty list with another nonempty
175   list. This is to prevent accidentally overwriting data when in most cases
176   '+=' was intended. To overwrite a list on purpose, first assign it to the
177   empty list:
178 
179     mylist = []
180     mylist = otherlist
181 
182   When assigning to a list named 'sources' using '=' or '+=', list items may be
183   automatically filtered out. See "gn help set_sources_assignment_filter" for
184   more.
185 
186 Scopes
187 
188   All execution happens in the context of a scope which holds the current state
189   (like variables). With the exception of loops and conditions, '{' introduces
190   a new scope that has a parent reference to the old scope.
191 
192   Variable reads recursively search all nested scopes until the variable is
193   found or there are no more scopes. Variable writes always go into the current
194   scope. This means that after the closing '}' (again excepting loops and
195   conditions), all local variables will be restored to the previous values.
196   This also means that "foo = foo" can do useful work by copying a variable
197   into the current scope that was defined in a containing scope.
198 
199   Scopes can also be assigned to variables. Such scopes can be created by
200   functions like exec_script, when invoking a template (the template code
201   refers to the variables set by the invoking code by the implicitly-created
202   "invoker" scope), or explicitly like:
203 
204     empty_scope = {}
205     myvalues = {
206       foo = 21
207       bar = "something"
208     }
209 
210   Inside such a scope definition can be any GN code including conditionals and
211   function calls. After the close of the scope, it will contain all variables
212   explicitly set by the code contained inside it. After this, the values can be
213   read, modified, or added to:
214 
215     myvalues.foo += 2
216     empty_scope.new_thing = [ 1, 2, 3 ]
217 
218   Scope equality is defined as single-level scopes identical within the current
219   scope. That is, all values in the first scope must be present and identical
220   within the second, and vice versa. Note that this means inherited scopes are
221   always unequal by definition.
222 )*";
223 
224 // Precedence constants.
225 //
226 // Currently all operators are left-associative so this list is sequential. To
227 // implement a right-assocative operators in a Pratt parser we would leave gaps
228 // in between the constants, and right-associative operators get a precedence
229 // of "<left-associated-precedence> - 1".
230 enum Precedence {
231   PRECEDENCE_ASSIGNMENT = 1,  // Lowest precedence.
232   PRECEDENCE_OR = 2,
233   PRECEDENCE_AND = 3,
234   PRECEDENCE_EQUALITY = 4,
235   PRECEDENCE_RELATION = 5,
236   PRECEDENCE_SUM = 6,
237   PRECEDENCE_PREFIX = 7,
238   PRECEDENCE_CALL = 8,
239   PRECEDENCE_DOT = 9,  // Highest precedence.
240 };
241 
242 // The top-level for blocks/ifs is recursive descent, the expression parser is
243 // a Pratt parser. The basic idea there is to have the precedences (and
244 // associativities) encoded relative to each other and only parse up until you
245 // hit something of that precedence. There's a dispatch table in expressions_
246 // at the top of parser.cc that describes how each token dispatches if it's
247 // seen as either a prefix or infix operator, and if it's infix, what its
248 // precedence is.
249 //
250 // References:
251 // http://javascript.crockford.com/tdop/tdop.html
252 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
253 
254 // Indexed by Token::Type.
255 ParserHelper Parser::expressions_[] = {
256     {nullptr, nullptr, -1},                                   // INVALID
257     {&Parser::Literal, nullptr, -1},                          // INTEGER
258     {&Parser::Literal, nullptr, -1},                          // STRING
259     {&Parser::Literal, nullptr, -1},                          // TRUE_TOKEN
260     {&Parser::Literal, nullptr, -1},                          // FALSE_TOKEN
261     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // EQUAL
262     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // PLUS
263     {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM},       // MINUS
264     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // PLUS_EQUALS
265     {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT},    // MINUS_EQUALS
266     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // EQUAL_EQUAL
267     {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY},  // NOT_EQUAL
268     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_EQUAL
269     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_EQUAL
270     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // LESS_THAN
271     {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION},  // GREATER_THAN
272     {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND},       // BOOLEAN_AND
273     {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR},        // BOOLEAN_OR
274     {&Parser::Not, nullptr, -1},                              // BANG
275     {nullptr, &Parser::DotOperator, PRECEDENCE_DOT},          // DOT
276     {&Parser::Group, nullptr, -1},                            // LEFT_PAREN
277     {nullptr, nullptr, -1},                                   // RIGHT_PAREN
278     {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL},     // LEFT_BRACKET
279     {nullptr, nullptr, -1},                                   // RIGHT_BRACKET
280     {&Parser::Block, nullptr, -1},                            // LEFT_BRACE
281     {nullptr, nullptr, -1},                                   // RIGHT_BRACE
282     {nullptr, nullptr, -1},                                   // IF
283     {nullptr, nullptr, -1},                                   // ELSE
284     {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL},  // IDENTIFIER
285     {nullptr, nullptr, -1},                                       // COMMA
286     {nullptr, nullptr, -1},                // UNCLASSIFIED_COMMENT
287     {nullptr, nullptr, -1},                // LINE_COMMENT
288     {nullptr, nullptr, -1},                // SUFFIX_COMMENT
289     {&Parser::BlockComment, nullptr, -1},  // BLOCK_COMMENT
290 };
291 
Parser(const std::vector<Token> & tokens,Err * err)292 Parser::Parser(const std::vector<Token>& tokens, Err* err)
293     : invalid_token_(Location(), Token::INVALID, std::string_view()),
294       err_(err),
295       cur_(0) {
296   for (const auto& token : tokens) {
297     switch (token.type()) {
298       case Token::LINE_COMMENT:
299         line_comment_tokens_.push_back(token);
300         break;
301       case Token::SUFFIX_COMMENT:
302         suffix_comment_tokens_.push_back(token);
303         break;
304       default:
305         // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
306         // through the real parser.
307         tokens_.push_back(token);
308         break;
309     }
310   }
311 }
312 
313 Parser::~Parser() = default;
314 
315 // static
Parse(const std::vector<Token> & tokens,Err * err)316 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
317                                          Err* err) {
318   Parser p(tokens, err);
319   return p.ParseFile();
320 }
321 
322 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)323 std::unique_ptr<ParseNode> Parser::ParseExpression(
324     const std::vector<Token>& tokens,
325     Err* err) {
326   Parser p(tokens, err);
327   std::unique_ptr<ParseNode> expr = p.ParseExpression();
328   if (!p.at_end() && !err->has_error()) {
329     *err = Err(p.cur_token(), "Trailing garbage");
330     return nullptr;
331   }
332   return expr;
333 }
334 
335 // static
ParseValue(const std::vector<Token> & tokens,Err * err)336 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
337                                               Err* err) {
338   for (const Token& token : tokens) {
339     switch (token.type()) {
340       case Token::INTEGER:
341       case Token::STRING:
342       case Token::TRUE_TOKEN:
343       case Token::FALSE_TOKEN:
344       case Token::LEFT_BRACKET:
345       case Token::RIGHT_BRACKET:
346       case Token::COMMA:
347         continue;
348       default:
349         *err = Err(token, "Invalid token in literal value");
350         return nullptr;
351     }
352   }
353 
354   return ParseExpression(tokens, err);
355 }
356 
IsAssignment(const ParseNode * node) const357 bool Parser::IsAssignment(const ParseNode* node) const {
358   return node && node->AsBinaryOp() &&
359          (node->AsBinaryOp()->op().type() == Token::EQUAL ||
360           node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
361           node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
362 }
363 
IsStatementBreak(Token::Type token_type) const364 bool Parser::IsStatementBreak(Token::Type token_type) const {
365   switch (token_type) {
366     case Token::IDENTIFIER:
367     case Token::LEFT_BRACE:
368     case Token::RIGHT_BRACE:
369     case Token::IF:
370     case Token::ELSE:
371       return true;
372     default:
373       return false;
374   }
375 }
376 
LookAhead(Token::Type type)377 bool Parser::LookAhead(Token::Type type) {
378   if (at_end())
379     return false;
380   return cur_token().type() == type;
381 }
382 
Match(Token::Type type)383 bool Parser::Match(Token::Type type) {
384   if (!LookAhead(type))
385     return false;
386   Consume();
387   return true;
388 }
389 
Consume(Token::Type type,const char * error_message)390 const Token& Parser::Consume(Token::Type type, const char* error_message) {
391   Token::Type types[1] = {type};
392   return Consume(types, 1, error_message);
393 }
394 
Consume(Token::Type * types,size_t num_types,const char * error_message)395 const Token& Parser::Consume(Token::Type* types,
396                              size_t num_types,
397                              const char* error_message) {
398   if (has_error()) {
399     // Don't overwrite current error, but make progress through tokens so that
400     // a loop that's expecting a particular token will still terminate.
401     if (!at_end())
402       cur_++;
403     return invalid_token_;
404   }
405   if (at_end()) {
406     const char kEOFMsg[] = "I hit EOF instead.";
407     if (tokens_.empty())
408       *err_ = Err(Location(), error_message, kEOFMsg);
409     else
410       *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
411     return invalid_token_;
412   }
413 
414   for (size_t i = 0; i < num_types; ++i) {
415     if (cur_token().type() == types[i])
416       return Consume();
417   }
418   *err_ = Err(cur_token(), error_message);
419   return invalid_token_;
420 }
421 
Consume()422 const Token& Parser::Consume() {
423   return tokens_[cur_++];
424 }
425 
ParseExpression()426 std::unique_ptr<ParseNode> Parser::ParseExpression() {
427   return ParseExpression(0);
428 }
429 
ParseExpression(int precedence)430 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
431   if (at_end())
432     return std::unique_ptr<ParseNode>();
433 
434   const Token& token = Consume();
435   PrefixFunc prefix = expressions_[token.type()].prefix;
436 
437   if (prefix == nullptr) {
438     *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
439     return std::unique_ptr<ParseNode>();
440   }
441 
442   std::unique_ptr<ParseNode> left = (this->*prefix)(token);
443   if (has_error())
444     return left;
445 
446   while (!at_end() && !IsStatementBreak(cur_token().type()) &&
447          precedence <= expressions_[cur_token().type()].precedence) {
448     const Token& next_token = Consume();
449     InfixFunc infix = expressions_[next_token.type()].infix;
450     if (infix == nullptr) {
451       *err_ = Err(next_token,
452                   "Unexpected token '" + std::string(next_token.value()) + "'");
453       return std::unique_ptr<ParseNode>();
454     }
455     left = (this->*infix)(std::move(left), next_token);
456     if (has_error())
457       return std::unique_ptr<ParseNode>();
458   }
459 
460   return left;
461 }
462 
Block(const Token & token)463 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
464   // This entrypoint into ParseBlock means it's part of an expression and we
465   // always want the result.
466   return ParseBlock(token, BlockNode::RETURNS_SCOPE);
467 }
468 
Literal(const Token & token)469 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
470   return std::make_unique<LiteralNode>(token);
471 }
472 
Name(const Token & token)473 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
474   return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
475 }
476 
BlockComment(const Token & token)477 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
478   std::unique_ptr<BlockCommentNode> comment =
479       std::make_unique<BlockCommentNode>();
480   comment->set_comment(token);
481   return std::move(comment);
482 }
483 
Group(const Token & token)484 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
485   std::unique_ptr<ParseNode> expr = ParseExpression();
486   if (has_error())
487     return std::unique_ptr<ParseNode>();
488   Consume(Token::RIGHT_PAREN, "Expected ')'");
489   return expr;
490 }
491 
Not(const Token & token)492 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
493   std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
494   if (has_error())
495     return std::unique_ptr<ParseNode>();
496   if (!expr) {
497     if (!has_error())
498       *err_ = Err(token, "Expected right-hand side for '!'.");
499     return std::unique_ptr<ParseNode>();
500   }
501   std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
502   unary_op->set_op(token);
503   unary_op->set_operand(std::move(expr));
504   return std::move(unary_op);
505 }
506 
List(const Token & node)507 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
508   std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
509   if (!has_error() && !at_end())
510     Consume(Token::RIGHT_BRACKET, "Expected ']'");
511   return list;
512 }
513 
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)514 std::unique_ptr<ParseNode> Parser::BinaryOperator(
515     std::unique_ptr<ParseNode> left,
516     const Token& token) {
517   std::unique_ptr<ParseNode> right =
518       ParseExpression(expressions_[token.type()].precedence + 1);
519   if (!right) {
520     if (!has_error()) {
521       *err_ = Err(token, "Expected right-hand side for '" +
522                              std::string(token.value()) + "'");
523     }
524     return std::unique_ptr<ParseNode>();
525   }
526   std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
527   binary_op->set_op(token);
528   binary_op->set_left(std::move(left));
529   binary_op->set_right(std::move(right));
530   return std::move(binary_op);
531 }
532 
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)533 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
534     std::unique_ptr<ParseNode> left,
535     const Token& token) {
536   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
537   list->set_begin_token(token);
538   list->set_end(std::make_unique<EndNode>(token));
539   std::unique_ptr<BlockNode> block;
540   bool has_arg = false;
541   if (LookAhead(Token::LEFT_PAREN)) {
542     const Token& start_token = Consume();
543     // Parsing a function call.
544     has_arg = true;
545     if (Match(Token::RIGHT_PAREN)) {
546       // Nothing, just an empty call.
547     } else {
548       list = ParseList(start_token, Token::RIGHT_PAREN, false);
549       if (has_error())
550         return std::unique_ptr<ParseNode>();
551       Consume(Token::RIGHT_PAREN, "Expected ')' after call");
552     }
553     // Optionally with a scope.
554     if (LookAhead(Token::LEFT_BRACE)) {
555       block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
556       if (has_error())
557         return std::unique_ptr<ParseNode>();
558     }
559   }
560 
561   if (!left && !has_arg) {
562     // Not a function call, just a standalone identifier.
563     return std::make_unique<IdentifierNode>(token);
564   }
565   std::unique_ptr<FunctionCallNode> func_call =
566       std::make_unique<FunctionCallNode>();
567   func_call->set_function(token);
568   func_call->set_args(std::move(list));
569   if (block)
570     func_call->set_block(std::move(block));
571   return std::move(func_call);
572 }
573 
Assignment(std::unique_ptr<ParseNode> left,const Token & token)574 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
575                                               const Token& token) {
576   if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
577     *err_ = Err(left.get(),
578                 "The left-hand side of an assignment must be an identifier, "
579                 "scope access, or array access.");
580     return std::unique_ptr<ParseNode>();
581   }
582   std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
583   if (!value) {
584     if (!has_error())
585       *err_ = Err(token, "Expected right-hand side for assignment.");
586     return std::unique_ptr<ParseNode>();
587   }
588   std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
589   assign->set_op(token);
590   assign->set_left(std::move(left));
591   assign->set_right(std::move(value));
592   return std::move(assign);
593 }
594 
Subscript(std::unique_ptr<ParseNode> left,const Token & token)595 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
596                                              const Token& token) {
597   // TODO: Maybe support more complex expressions like a[0][0]. This would
598   // require work on the evaluator too.
599   if (left->AsIdentifier() == nullptr) {
600     *err_ = Err(
601         left.get(), "May only subscript identifiers.",
602         "The thing on the left hand side of the [] must be an identifier\n"
603         "and not an expression. If you need this, you'll have to assign the\n"
604         "value to a temporary before subscripting. Sorry.");
605     return std::unique_ptr<ParseNode>();
606   }
607   std::unique_ptr<ParseNode> value = ParseExpression();
608   Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
609   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
610   accessor->set_base(left->AsIdentifier()->value());
611   accessor->set_subscript(std::move(value));
612   return std::move(accessor);
613 }
614 
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)615 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
616                                                const Token& token) {
617   if (left->AsIdentifier() == nullptr) {
618     *err_ = Err(
619         left.get(), "May only use \".\" for identifiers.",
620         "The thing on the left hand side of the dot must be an identifier\n"
621         "and not an expression. If you need this, you'll have to assign the\n"
622         "value to a temporary first. Sorry.");
623     return std::unique_ptr<ParseNode>();
624   }
625 
626   std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
627   if (!right || !right->AsIdentifier()) {
628     *err_ = Err(
629         token, "Expected identifier for right-hand-side of \".\"",
630         "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
631     return std::unique_ptr<ParseNode>();
632   }
633 
634   std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
635   accessor->set_base(left->AsIdentifier()->value());
636   accessor->set_member(std::unique_ptr<IdentifierNode>(
637       static_cast<IdentifierNode*>(right.release())));
638   return std::move(accessor);
639 }
640 
641 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)642 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
643                                             Token::Type stop_before,
644                                             bool allow_trailing_comma) {
645   std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
646   list->set_begin_token(start_token);
647   bool just_got_comma = false;
648   bool first_time = true;
649   while (!LookAhead(stop_before)) {
650     if (!first_time) {
651       if (!just_got_comma) {
652         // Require commas separate things in lists.
653         *err_ = Err(cur_token(), "Expected comma between items.");
654         return std::unique_ptr<ListNode>();
655       }
656     }
657     first_time = false;
658 
659     // Why _OR? We're parsing things that are higher precedence than the ,
660     // that separates the items of the list. , should appear lower than
661     // boolean expressions (the lowest of which is OR), but above assignments.
662     list->append_item(ParseExpression(PRECEDENCE_OR));
663     if (has_error())
664       return std::unique_ptr<ListNode>();
665     if (at_end()) {
666       *err_ =
667           Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
668       return std::unique_ptr<ListNode>();
669     }
670     if (list->contents().back()->AsBlockComment()) {
671       // If there was a comment inside the list, we don't need a comma to the
672       // next item, so pretend we got one, if we're expecting one.
673       just_got_comma = allow_trailing_comma;
674     } else {
675       just_got_comma = Match(Token::COMMA);
676     }
677   }
678   if (just_got_comma && !allow_trailing_comma) {
679     *err_ = Err(cur_token(), "Trailing comma");
680     return std::unique_ptr<ListNode>();
681   }
682   list->set_end(std::make_unique<EndNode>(cur_token()));
683   return list;
684 }
685 
ParseFile()686 std::unique_ptr<ParseNode> Parser::ParseFile() {
687   std::unique_ptr<BlockNode> file =
688       std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
689   for (;;) {
690     if (at_end())
691       break;
692     std::unique_ptr<ParseNode> statement = ParseStatement();
693     if (!statement)
694       break;
695     file->append_statement(std::move(statement));
696   }
697   if (!at_end() && !has_error())
698     *err_ = Err(cur_token(), "Unexpected here, should be newline.");
699   if (has_error())
700     return std::unique_ptr<ParseNode>();
701 
702   // TODO(scottmg): If this is measurably expensive, it could be done only
703   // when necessary (when reformatting, or during tests). Comments are
704   // separate from the parse tree at this point, so downstream code can remain
705   // ignorant of them.
706   AssignComments(file.get());
707 
708   return std::move(file);
709 }
710 
ParseStatement()711 std::unique_ptr<ParseNode> Parser::ParseStatement() {
712   if (LookAhead(Token::IF)) {
713     return ParseCondition();
714   } else if (LookAhead(Token::BLOCK_COMMENT)) {
715     return BlockComment(Consume());
716   } else {
717     // TODO(scottmg): Is this too strict? Just drop all the testing if we want
718     // to allow "pointless" expressions and return ParseExpression() directly.
719     std::unique_ptr<ParseNode> stmt = ParseExpression();
720     if (stmt) {
721       if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
722         return stmt;
723     }
724     if (!has_error()) {
725       const Token& token = cur_or_last_token();
726       *err_ = Err(token, "Expecting assignment or function call.");
727     }
728     return std::unique_ptr<ParseNode>();
729   }
730 }
731 
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)732 std::unique_ptr<BlockNode> Parser::ParseBlock(
733     const Token& begin_brace,
734     BlockNode::ResultMode result_mode) {
735   if (has_error())
736     return std::unique_ptr<BlockNode>();
737   std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
738   block->set_begin_token(begin_brace);
739 
740   for (;;) {
741     if (LookAhead(Token::RIGHT_BRACE)) {
742       block->set_end(std::make_unique<EndNode>(Consume()));
743       break;
744     }
745 
746     std::unique_ptr<ParseNode> statement = ParseStatement();
747     if (!statement)
748       return std::unique_ptr<BlockNode>();
749     block->append_statement(std::move(statement));
750   }
751   return block;
752 }
753 
ParseCondition()754 std::unique_ptr<ParseNode> Parser::ParseCondition() {
755   std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
756   condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
757   Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
758   condition->set_condition(ParseExpression());
759   if (IsAssignment(condition->condition()))
760     *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
761   Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
762   condition->set_if_true(ParseBlock(
763       Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
764       BlockNode::DISCARDS_RESULT));
765   if (Match(Token::ELSE)) {
766     if (LookAhead(Token::LEFT_BRACE)) {
767       condition->set_if_false(
768           ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
769     } else if (LookAhead(Token::IF)) {
770       condition->set_if_false(ParseStatement());
771     } else {
772       *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
773       return std::unique_ptr<ParseNode>();
774     }
775   }
776   if (has_error())
777     return std::unique_ptr<ParseNode>();
778   return std::move(condition);
779 }
780 
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)781 void Parser::TraverseOrder(const ParseNode* root,
782                            std::vector<const ParseNode*>* pre,
783                            std::vector<const ParseNode*>* post) {
784   if (root) {
785     pre->push_back(root);
786 
787     if (const AccessorNode* accessor = root->AsAccessor()) {
788       TraverseOrder(accessor->subscript(), pre, post);
789       TraverseOrder(accessor->member(), pre, post);
790     } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
791       TraverseOrder(binop->left(), pre, post);
792       TraverseOrder(binop->right(), pre, post);
793     } else if (const BlockNode* block = root->AsBlock()) {
794       for (const auto& statement : block->statements())
795         TraverseOrder(statement.get(), pre, post);
796       TraverseOrder(block->End(), pre, post);
797     } else if (const ConditionNode* condition = root->AsConditionNode()) {
798       TraverseOrder(condition->condition(), pre, post);
799       TraverseOrder(condition->if_true(), pre, post);
800       TraverseOrder(condition->if_false(), pre, post);
801     } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
802       TraverseOrder(func_call->args(), pre, post);
803       TraverseOrder(func_call->block(), pre, post);
804     } else if (root->AsIdentifier()) {
805       // Nothing.
806     } else if (const ListNode* list = root->AsList()) {
807       for (const auto& node : list->contents())
808         TraverseOrder(node.get(), pre, post);
809       TraverseOrder(list->End(), pre, post);
810     } else if (root->AsLiteral()) {
811       // Nothing.
812     } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
813       TraverseOrder(unaryop->operand(), pre, post);
814     } else if (root->AsBlockComment()) {
815       // Nothing.
816     } else if (root->AsEnd()) {
817       // Nothing.
818     } else {
819       CHECK(false) << "Unhandled case in TraverseOrder.";
820     }
821 
822     post->push_back(root);
823   }
824 }
825 
AssignComments(ParseNode * file)826 void Parser::AssignComments(ParseNode* file) {
827   // Start by generating a pre- and post- order traversal of the tree so we
828   // can determine what's before and after comments.
829   std::vector<const ParseNode*> pre;
830   std::vector<const ParseNode*> post;
831   TraverseOrder(file, &pre, &post);
832 
833   // Assign line comments to syntax immediately following.
834   int cur_comment = 0;
835   for (auto* node : pre) {
836     if (node->GetRange().is_null()) {
837       CHECK_EQ(node, file) << "Only expected on top file node";
838       continue;
839     }
840     const Location start = node->GetRange().begin();
841     while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
842       if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
843         const_cast<ParseNode*>(node)->comments_mutable()->append_before(
844             line_comment_tokens_[cur_comment]);
845         ++cur_comment;
846       } else {
847         break;
848       }
849     }
850   }
851 
852   // Remaining line comments go at end of file.
853   for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
854        ++cur_comment)
855     file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
856 
857   // Assign suffix to syntax immediately before.
858   cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
859   for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
860        i != post.rend(); ++i) {
861     // Don't assign suffix comments to the function, list, or block, but instead
862     // to the last thing inside.
863     if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
864       continue;
865 
866     Location start = (*i)->GetRange().begin();
867     Location end = (*i)->GetRange().end();
868 
869     // Don't assign suffix comments to something that starts on an earlier
870     // line, so that in:
871     //
872     // sources = [ "a",
873     //     "b" ] # comment
874     //
875     // it's attached to "b", not sources = [ ... ].
876     if (start.line_number() != end.line_number())
877       continue;
878 
879     while (cur_comment >= 0) {
880       if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
881         const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
882             suffix_comment_tokens_[cur_comment]);
883         --cur_comment;
884       } else {
885         break;
886       }
887     }
888 
889     // Suffix comments were assigned in reverse, so if there were multiple on
890     // the same node, they need to be reversed.
891     if ((*i)->comments() && !(*i)->comments()->suffix().empty())
892       const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
893   }
894 }
895 
IndentFor(int value)896 std::string IndentFor(int value) {
897   return std::string(value, ' ');
898 }
899 
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)900 void RenderToText(const base::Value& node,
901                   int indent_level,
902                   std::ostringstream& os) {
903   const base::Value* child = node.FindKey(std::string("child"));
904   std::string node_type(node.FindKey("type")->GetString());
905   if (node_type == "ACCESSOR") {
906     // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
907     // for the base.
908     os << IndentFor(indent_level) << node_type << std::endl;
909     os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
910        << std::endl;
911   } else {
912     os << IndentFor(indent_level) << node_type;
913     if (node.FindKey("value")) {
914       os << "(" << node.FindKey("value")->GetString() << ")";
915     }
916     os << std::endl;
917   }
918   if (node.FindKey(kJsonBeforeComment)) {
919     for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
920       os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
921          << "\")\n";
922     }
923   }
924   if (node.FindKey(kJsonSuffixComment)) {
925     for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
926       os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
927          << "\")\n";
928     }
929   }
930   if (node.FindKey(kJsonAfterComment)) {
931     for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
932       os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
933          << "\")\n";
934     }
935   }
936   if (child) {
937     for (const base::Value& n : child->GetList()) {
938       RenderToText(n, indent_level + 1, os);
939     }
940   }
941 }
942