1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "gn/parser.h"
6
7 #include <memory>
8 #include <utility>
9
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14
15 const char kGrammar_Help[] =
16 R"*(Language and grammar for GN build files
17
18 Tokens
19
20 GN build files are read as sequences of tokens. While splitting the file
21 into tokens, the next token is the longest sequence of characters that form a
22 valid token.
23
24 White space and comments
25
26 White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27 carriage returns (U+000D), and newlines (U+000A).
28
29 Comments start at the character "#" and stop at the next newline.
30
31 White space and comments are ignored except that they may separate tokens
32 that would otherwise combine into a single token.
33
34 Identifiers
35
36 Identifiers name variables and functions.
37
38 identifier = letter { letter | digit } .
39 letter = "A" ... "Z" | "a" ... "z" | "_" .
40 digit = "0" ... "9" .
41
42 Keywords
43
44 The following keywords are reserved and may not be used as identifiers:
45
46 else false if true
47
48 Integer literals
49
50 An integer literal represents a decimal integer value.
51
52 integer = [ "-" ] digit { digit } .
53
54 Leading zeros and negative zero are disallowed.
55
56 String literals
57
58 A string literal represents a string value consisting of the quoted
59 characters with possible escape sequences and variable expansions.
60
61 string = `"` { char | escape | expansion } `"` .
62 escape = `\` ( "$" | `"` | char ) .
63 BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess ) "}" .
64 Hex = "0x" [0-9A-Fa-f][0-9A-Fa-f]
65 expansion = "$" ( identifier | BracketExpansion | Hex ) .
66 char = /* any character except "$", `"`, or newline */ .
67
68 After a backslash, certain sequences represent special characters:
69
70 \" U+0022 quotation mark
71 \$ U+0024 dollar sign
72 \\ U+005C backslash
73
74 All other backslashes represent themselves.
75
76 To insert an arbitrary byte value, use $0xFF. For example, to insert a
77 newline character: "Line one$0x0ALine two".
78
79 An expansion will evaluate the variable following the '$' and insert a
80 stringified version of it into the result. For example, to concat two path
81 components with a slash separating them:
82 "$var_one/$var_two"
83 Use the "${var_one}" format to be explicitly deliniate the variable for
84 otherwise-ambiguous cases.
85
86 Punctuation
87
88 The following character sequences represent punctuation:
89
90 + += == != ( )
91 - -= < <= [ ]
92 ! = > >= { }
93 && || . ,
94
95 Grammar
96
97 The input tokens form a syntax tree following a context-free grammar:
98
99 File = StatementList .
100
101 Statement = Assignment | Call | Condition .
102 LValue = identifier | ArrayAccess | ScopeAccess .
103 Assignment = LValue AssignOp Expr .
104 Call = identifier "(" [ ExprList ] ")" [ Block ] .
105 Condition = "if" "(" Expr ")" Block
106 [ "else" ( Condition | Block ) ] .
107 Block = "{" StatementList "}" .
108 StatementList = { Statement } .
109
110 ArrayAccess = identifier "[" Expr "]" .
111 ScopeAccess = identifier "." identifier .
112 Expr = UnaryExpr | Expr BinaryOp Expr .
113 UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .
114 PrimaryExpr = identifier | integer | string | Call
115 | ArrayAccess | ScopeAccess | Block
116 | "(" Expr ")"
117 | "[" [ ExprList [ "," ] ] "]" .
118 ExprList = Expr { "," Expr } .
119
120 AssignOp = "=" | "+=" | "-=" .
121 UnaryOp = "!" .
122 BinaryOp = "+" | "-" // highest priority
123 | "<" | "<=" | ">" | ">="
124 | "==" | "!="
125 | "&&"
126 | "||" . // lowest priority
127
128 All binary operators are left-associative.
129
130 Types
131
132 The GN language is dynamically typed. The following types are used:
133
134 - Boolean: Uses the keywords "true" and "false". There is no implicit
135 conversion between booleans and integers.
136
137 - Integers: All numbers in GN are signed 64-bit integers.
138
139 - Strings: Strings are 8-bit with no enforced encoding. When a string is
140 used to interact with other systems with particular encodings (like the
141 Windows and Mac filesystems) it is assumed to be UTF-8. See "String
142 literals" above for more.
143
144 - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
145 below for more.
146
147 - Scopes: Scopes are like dictionaries that use variable names for keys. See
148 "Scopes" below for more.
149
150 Lists
151
152 Lists are created with [] and using commas to separate items:
153
154 mylist = [ 0, 1, 2, "some string" ]
155
156 A comma after the last item is optional. Lists are dereferenced using 0-based
157 indexing:
158
159 mylist[0] += 1
160 var = mylist[2]
161
162 Lists can be concatenated using the '+' and '+=' operators. Bare values can
163 not be concatenated with lists, to add a single item, it must be put into a
164 list of length one.
165
166 Items can be removed from lists using the '-' and '-=' operators. This will
167 remove all occurrences of every item in the right-hand list from the
168 left-hand list. It is an error to remove an item not in the list. This is to
169 prevent common typos and to detect dead code that is removing things that no
170 longer apply.
171
172 It is an error to use '=' to replace a nonempty list with another nonempty
173 list. This is to prevent accidentally overwriting data when in most cases
174 '+=' was intended. To overwrite a list on purpose, first assign it to the
175 empty list:
176
177 mylist = []
178 mylist = otherlist
179
180 Scopes
181
182 All execution happens in the context of a scope which holds the current state
183 (like variables). With the exception of loops and conditions, '{' introduces
184 a new scope.
185
186 Most scopes have a parent reference to the old scope. Variable reads
187 recursively search all parent scopes until the variable is found or there are
188 no more scopes. Variable writes always go into the current scope. This means
189 that after the closing '}' (again excepting loops and conditions), all local
190 variables will be restored to the previous values. This also means that "foo
191 = foo" can do useful work by copying a variable into the current scope that
192 was defined in a containing scope.
193
194 Scopes can be assigned to variables. Examples of such scopes are the
195 implicitly-created "invoker" when invoking a template (which refers to the
196 variables set by the invoking code), scopes created by functions like
197 exec_script, and scopes explicitly created like
198
199 empty_scope = {}
200 myvalues = {
201 foo = 21
202 bar = "something"
203 }
204
205 In the case of explicitly created scopes and scopes created by functions like
206 exec_script, there is no reference to the parent scope. Such scopes are fully
207 self-contained and do not "inherit" values from their defining scope.
208
209 Inside an explicit scope definition can be any GN code including conditionals
210 and function calls. After the close of the scope, it will contain all
211 variables explicitly set by the code contained inside it. After this, the
212 values can be read, modified, or added to:
213
214 myvalues.foo += 2
215 empty_scope.new_thing = [ 1, 2, 3 ]
216
217 Scope equality is defined as single-level scopes identical within the current
218 scope. That is, all values in the first scope must be present and identical
219 within the second, and vice versa. Note that this means inherited scopes are
220 always unequal by definition.
221 )*";
222
223 // Precedence constants.
224 //
225 // Currently all operators are left-associative so this list is sequential. To
226 // implement a right-assocative operators in a Pratt parser we would leave gaps
227 // in between the constants, and right-associative operators get a precedence
228 // of "<left-associated-precedence> - 1".
229 enum Precedence {
230 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
231 PRECEDENCE_OR = 2,
232 PRECEDENCE_AND = 3,
233 PRECEDENCE_EQUALITY = 4,
234 PRECEDENCE_RELATION = 5,
235 PRECEDENCE_SUM = 6,
236 PRECEDENCE_PREFIX = 7,
237 PRECEDENCE_CALL = 8,
238 PRECEDENCE_DOT = 9, // Highest precedence.
239 };
240
241 // The top-level for blocks/ifs is recursive descent, the expression parser is
242 // a Pratt parser. The basic idea there is to have the precedences (and
243 // associativities) encoded relative to each other and only parse up until you
244 // hit something of that precedence. There's a dispatch table in expressions_
245 // at the top of parser.cc that describes how each token dispatches if it's
246 // seen as either a prefix or infix operator, and if it's infix, what its
247 // precedence is.
248 //
249 // References:
250 // http://javascript.crockford.com/tdop/tdop.html
251 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
252
253 // Indexed by Token::Type.
254 ParserHelper Parser::expressions_[] = {
255 {nullptr, nullptr, -1}, // INVALID
256 {&Parser::Literal, nullptr, -1}, // INTEGER
257 {&Parser::Literal, nullptr, -1}, // STRING
258 {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN
259 {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN
260 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
261 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
262 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
263 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
264 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
265 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
266 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
267 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
268 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
269 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
270 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
271 {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
272 {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
273 {&Parser::Not, nullptr, -1}, // BANG
274 {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
275 {&Parser::Group, nullptr, -1}, // LEFT_PAREN
276 {nullptr, nullptr, -1}, // RIGHT_PAREN
277 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
278 {nullptr, nullptr, -1}, // RIGHT_BRACKET
279 {&Parser::Block, nullptr, -1}, // LEFT_BRACE
280 {nullptr, nullptr, -1}, // RIGHT_BRACE
281 {nullptr, nullptr, -1}, // IF
282 {nullptr, nullptr, -1}, // ELSE
283 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
284 {nullptr, nullptr, -1}, // COMMA
285 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
286 {nullptr, nullptr, -1}, // LINE_COMMENT
287 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
288 {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT
289 };
290
Parser(const std::vector<Token> & tokens,Err * err)291 Parser::Parser(const std::vector<Token>& tokens, Err* err)
292 : invalid_token_(Location(), Token::INVALID, std::string_view()),
293 err_(err),
294 cur_(0) {
295 for (const auto& token : tokens) {
296 switch (token.type()) {
297 case Token::LINE_COMMENT:
298 line_comment_tokens_.push_back(token);
299 break;
300 case Token::SUFFIX_COMMENT:
301 suffix_comment_tokens_.push_back(token);
302 break;
303 default:
304 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
305 // through the real parser.
306 tokens_.push_back(token);
307 break;
308 }
309 }
310 }
311
312 Parser::~Parser() = default;
313
314 // static
Parse(const std::vector<Token> & tokens,Err * err)315 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
316 Err* err) {
317 Parser p(tokens, err);
318 return p.ParseFile();
319 }
320
321 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)322 std::unique_ptr<ParseNode> Parser::ParseExpression(
323 const std::vector<Token>& tokens,
324 Err* err) {
325 Parser p(tokens, err);
326 std::unique_ptr<ParseNode> expr = p.ParseExpression();
327 if (!p.at_end() && !err->has_error()) {
328 *err = Err(p.cur_token(), "Trailing garbage");
329 return nullptr;
330 }
331 return expr;
332 }
333
334 // static
ParseValue(const std::vector<Token> & tokens,Err * err)335 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
336 Err* err) {
337 for (const Token& token : tokens) {
338 switch (token.type()) {
339 case Token::INTEGER:
340 case Token::STRING:
341 case Token::TRUE_TOKEN:
342 case Token::FALSE_TOKEN:
343 case Token::LEFT_BRACKET:
344 case Token::RIGHT_BRACKET:
345 case Token::COMMA:
346 continue;
347 default:
348 *err = Err(token, "Invalid token in literal value");
349 return nullptr;
350 }
351 }
352
353 return ParseExpression(tokens, err);
354 }
355
IsAssignment(const ParseNode * node) const356 bool Parser::IsAssignment(const ParseNode* node) const {
357 return node && node->AsBinaryOp() &&
358 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
359 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
360 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
361 }
362
IsStatementBreak(Token::Type token_type) const363 bool Parser::IsStatementBreak(Token::Type token_type) const {
364 switch (token_type) {
365 case Token::IDENTIFIER:
366 case Token::LEFT_BRACE:
367 case Token::RIGHT_BRACE:
368 case Token::IF:
369 case Token::ELSE:
370 return true;
371 default:
372 return false;
373 }
374 }
375
LookAhead(Token::Type type)376 bool Parser::LookAhead(Token::Type type) {
377 if (at_end())
378 return false;
379 return cur_token().type() == type;
380 }
381
Match(Token::Type type)382 bool Parser::Match(Token::Type type) {
383 if (!LookAhead(type))
384 return false;
385 Consume();
386 return true;
387 }
388
Consume(Token::Type type,const char * error_message)389 const Token& Parser::Consume(Token::Type type, const char* error_message) {
390 Token::Type types[1] = {type};
391 return Consume(types, 1, error_message);
392 }
393
Consume(Token::Type * types,size_t num_types,const char * error_message)394 const Token& Parser::Consume(Token::Type* types,
395 size_t num_types,
396 const char* error_message) {
397 if (has_error()) {
398 // Don't overwrite current error, but make progress through tokens so that
399 // a loop that's expecting a particular token will still terminate.
400 if (!at_end())
401 cur_++;
402 return invalid_token_;
403 }
404 if (at_end()) {
405 const char kEOFMsg[] = "I hit EOF instead.";
406 if (tokens_.empty())
407 *err_ = Err(Location(), error_message, kEOFMsg);
408 else
409 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
410 return invalid_token_;
411 }
412
413 for (size_t i = 0; i < num_types; ++i) {
414 if (cur_token().type() == types[i])
415 return Consume();
416 }
417 *err_ = Err(cur_token(), error_message);
418 return invalid_token_;
419 }
420
Consume()421 const Token& Parser::Consume() {
422 return tokens_[cur_++];
423 }
424
ParseExpression()425 std::unique_ptr<ParseNode> Parser::ParseExpression() {
426 return ParseExpression(0);
427 }
428
ParseExpression(int precedence)429 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
430 if (at_end())
431 return std::unique_ptr<ParseNode>();
432
433 const Token& token = Consume();
434 PrefixFunc prefix = expressions_[token.type()].prefix;
435
436 if (prefix == nullptr) {
437 *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
438 return std::unique_ptr<ParseNode>();
439 }
440
441 std::unique_ptr<ParseNode> left = (this->*prefix)(token);
442 if (has_error())
443 return left;
444
445 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
446 precedence <= expressions_[cur_token().type()].precedence) {
447 const Token& next_token = Consume();
448 InfixFunc infix = expressions_[next_token.type()].infix;
449 if (infix == nullptr) {
450 *err_ = Err(next_token,
451 "Unexpected token '" + std::string(next_token.value()) + "'");
452 return std::unique_ptr<ParseNode>();
453 }
454 left = (this->*infix)(std::move(left), next_token);
455 if (has_error())
456 return std::unique_ptr<ParseNode>();
457 }
458
459 return left;
460 }
461
Block(const Token & token)462 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
463 // This entrypoint into ParseBlock means it's part of an expression and we
464 // always want the result.
465 return ParseBlock(token, BlockNode::RETURNS_SCOPE);
466 }
467
Literal(const Token & token)468 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
469 return std::make_unique<LiteralNode>(token);
470 }
471
Name(const Token & token)472 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
473 return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
474 }
475
BlockComment(const Token & token)476 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
477 std::unique_ptr<BlockCommentNode> comment =
478 std::make_unique<BlockCommentNode>();
479 comment->set_comment(token);
480 return comment;
481 }
482
Group(const Token & token)483 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
484 std::unique_ptr<ParseNode> expr = ParseExpression();
485 if (has_error())
486 return std::unique_ptr<ParseNode>();
487 Consume(Token::RIGHT_PAREN, "Expected ')'");
488 return expr;
489 }
490
Not(const Token & token)491 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
492 std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
493 if (has_error())
494 return std::unique_ptr<ParseNode>();
495 if (!expr) {
496 if (!has_error())
497 *err_ = Err(token, "Expected right-hand side for '!'.");
498 return std::unique_ptr<ParseNode>();
499 }
500 std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
501 unary_op->set_op(token);
502 unary_op->set_operand(std::move(expr));
503 return unary_op;
504 }
505
List(const Token & node)506 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
507 std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
508 if (!has_error() && !at_end())
509 Consume(Token::RIGHT_BRACKET, "Expected ']'");
510 return list;
511 }
512
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)513 std::unique_ptr<ParseNode> Parser::BinaryOperator(
514 std::unique_ptr<ParseNode> left,
515 const Token& token) {
516 std::unique_ptr<ParseNode> right =
517 ParseExpression(expressions_[token.type()].precedence + 1);
518 if (!right) {
519 if (!has_error()) {
520 *err_ = Err(token, "Expected right-hand side for '" +
521 std::string(token.value()) + "'");
522 }
523 return std::unique_ptr<ParseNode>();
524 }
525 std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
526 binary_op->set_op(token);
527 binary_op->set_left(std::move(left));
528 binary_op->set_right(std::move(right));
529 return binary_op;
530 }
531
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)532 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
533 std::unique_ptr<ParseNode> left,
534 const Token& token) {
535 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
536 list->set_begin_token(token);
537 list->set_end(std::make_unique<EndNode>(token));
538 std::unique_ptr<BlockNode> block;
539 bool has_arg = false;
540 if (LookAhead(Token::LEFT_PAREN)) {
541 const Token& start_token = Consume();
542 // Parsing a function call.
543 has_arg = true;
544 if (Match(Token::RIGHT_PAREN)) {
545 // Nothing, just an empty call.
546 } else {
547 list = ParseList(start_token, Token::RIGHT_PAREN, false);
548 if (has_error())
549 return std::unique_ptr<ParseNode>();
550 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
551 }
552 // Optionally with a scope.
553 if (LookAhead(Token::LEFT_BRACE)) {
554 block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
555 if (has_error())
556 return std::unique_ptr<ParseNode>();
557 }
558 }
559
560 if (!left && !has_arg) {
561 // Not a function call, just a standalone identifier.
562 return std::make_unique<IdentifierNode>(token);
563 }
564 std::unique_ptr<FunctionCallNode> func_call =
565 std::make_unique<FunctionCallNode>();
566 func_call->set_function(token);
567 func_call->set_args(std::move(list));
568 if (block)
569 func_call->set_block(std::move(block));
570 return func_call;
571 }
572
Assignment(std::unique_ptr<ParseNode> left,const Token & token)573 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
574 const Token& token) {
575 if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
576 *err_ = Err(left.get(),
577 "The left-hand side of an assignment must be an identifier, "
578 "scope access, or array access.");
579 return std::unique_ptr<ParseNode>();
580 }
581 std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
582 if (!value) {
583 if (!has_error())
584 *err_ = Err(token, "Expected right-hand side for assignment.");
585 return std::unique_ptr<ParseNode>();
586 }
587 std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
588 assign->set_op(token);
589 assign->set_left(std::move(left));
590 assign->set_right(std::move(value));
591 return assign;
592 }
593
Subscript(std::unique_ptr<ParseNode> left,const Token & token)594 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
595 const Token& token) {
596 // TODO: Maybe support more complex expressions like a[0][0]. This would
597 // require work on the evaluator too.
598 if (left->AsIdentifier() == nullptr) {
599 *err_ = Err(
600 left.get(), "May only subscript identifiers.",
601 "The thing on the left hand side of the [] must be an identifier\n"
602 "and not an expression. If you need this, you'll have to assign the\n"
603 "value to a temporary before subscripting. Sorry.");
604 return std::unique_ptr<ParseNode>();
605 }
606 std::unique_ptr<ParseNode> value = ParseExpression();
607 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
608 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
609 accessor->set_base(left->AsIdentifier()->value());
610 accessor->set_subscript(std::move(value));
611 return accessor;
612 }
613
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)614 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
615 const Token& token) {
616 if (left->AsIdentifier() == nullptr) {
617 *err_ = Err(
618 left.get(), "May only use \".\" for identifiers.",
619 "The thing on the left hand side of the dot must be an identifier\n"
620 "and not an expression. If you need this, you'll have to assign the\n"
621 "value to a temporary first. Sorry.");
622 return std::unique_ptr<ParseNode>();
623 }
624
625 std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
626 if (!right || !right->AsIdentifier()) {
627 *err_ = Err(
628 token, "Expected identifier for right-hand-side of \".\"",
629 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
630 return std::unique_ptr<ParseNode>();
631 }
632
633 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
634 accessor->set_base(left->AsIdentifier()->value());
635 accessor->set_member(std::unique_ptr<IdentifierNode>(
636 static_cast<IdentifierNode*>(right.release())));
637 return accessor;
638 }
639
640 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)641 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
642 Token::Type stop_before,
643 bool allow_trailing_comma) {
644 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
645 list->set_begin_token(start_token);
646 bool just_got_comma = false;
647 bool first_time = true;
648 while (!LookAhead(stop_before)) {
649 if (!first_time) {
650 if (!just_got_comma) {
651 // Require commas separate things in lists.
652 *err_ = Err(cur_token(), "Expected comma between items.");
653 return std::unique_ptr<ListNode>();
654 }
655 }
656 first_time = false;
657
658 // Why _OR? We're parsing things that are higher precedence than the ,
659 // that separates the items of the list. , should appear lower than
660 // boolean expressions (the lowest of which is OR), but above assignments.
661 list->append_item(ParseExpression(PRECEDENCE_OR));
662 if (has_error())
663 return std::unique_ptr<ListNode>();
664 if (at_end()) {
665 *err_ =
666 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
667 return std::unique_ptr<ListNode>();
668 }
669 if (list->contents().back()->AsBlockComment()) {
670 // If there was a comment inside the list, we don't need a comma to the
671 // next item, so pretend we got one, if we're expecting one.
672 just_got_comma = allow_trailing_comma;
673 } else {
674 just_got_comma = Match(Token::COMMA);
675 }
676 }
677 if (just_got_comma && !allow_trailing_comma) {
678 *err_ = Err(cur_token(), "Trailing comma");
679 return std::unique_ptr<ListNode>();
680 }
681 list->set_end(std::make_unique<EndNode>(cur_token()));
682 return list;
683 }
684
ParseFile()685 std::unique_ptr<ParseNode> Parser::ParseFile() {
686 std::unique_ptr<BlockNode> file =
687 std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
688 for (;;) {
689 if (at_end())
690 break;
691 std::unique_ptr<ParseNode> statement = ParseStatement();
692 if (!statement)
693 break;
694 file->append_statement(std::move(statement));
695 }
696 if (!at_end() && !has_error())
697 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
698 if (has_error())
699 return std::unique_ptr<ParseNode>();
700
701 // TODO(scottmg): If this is measurably expensive, it could be done only
702 // when necessary (when reformatting, or during tests). Comments are
703 // separate from the parse tree at this point, so downstream code can remain
704 // ignorant of them.
705 AssignComments(file.get());
706
707 return file;
708 }
709
ParseStatement()710 std::unique_ptr<ParseNode> Parser::ParseStatement() {
711 if (LookAhead(Token::IF)) {
712 return ParseCondition();
713 } else if (LookAhead(Token::BLOCK_COMMENT)) {
714 return BlockComment(Consume());
715 } else {
716 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
717 // to allow "pointless" expressions and return ParseExpression() directly.
718 std::unique_ptr<ParseNode> stmt = ParseExpression();
719 if (stmt) {
720 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
721 return stmt;
722 }
723 if (!has_error()) {
724 const Token& token = cur_or_last_token();
725 *err_ = Err(token, "Expecting assignment or function call.");
726 }
727 return std::unique_ptr<ParseNode>();
728 }
729 }
730
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)731 std::unique_ptr<BlockNode> Parser::ParseBlock(
732 const Token& begin_brace,
733 BlockNode::ResultMode result_mode) {
734 if (has_error())
735 return std::unique_ptr<BlockNode>();
736 std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
737 block->set_begin_token(begin_brace);
738
739 for (;;) {
740 if (LookAhead(Token::RIGHT_BRACE)) {
741 block->set_end(std::make_unique<EndNode>(Consume()));
742 break;
743 }
744
745 std::unique_ptr<ParseNode> statement = ParseStatement();
746 if (!statement)
747 return std::unique_ptr<BlockNode>();
748 block->append_statement(std::move(statement));
749 }
750 return block;
751 }
752
ParseCondition()753 std::unique_ptr<ParseNode> Parser::ParseCondition() {
754 std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
755 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
756 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
757 condition->set_condition(ParseExpression());
758 if (IsAssignment(condition->condition()))
759 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
760 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
761 condition->set_if_true(ParseBlock(
762 Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
763 BlockNode::DISCARDS_RESULT));
764 if (Match(Token::ELSE)) {
765 if (LookAhead(Token::LEFT_BRACE)) {
766 condition->set_if_false(
767 ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
768 } else if (LookAhead(Token::IF)) {
769 condition->set_if_false(ParseStatement());
770 } else {
771 *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
772 return std::unique_ptr<ParseNode>();
773 }
774 }
775 if (has_error())
776 return std::unique_ptr<ParseNode>();
777 return condition;
778 }
779
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)780 void Parser::TraverseOrder(const ParseNode* root,
781 std::vector<const ParseNode*>* pre,
782 std::vector<const ParseNode*>* post) {
783 if (root) {
784 pre->push_back(root);
785
786 if (const AccessorNode* accessor = root->AsAccessor()) {
787 TraverseOrder(accessor->subscript(), pre, post);
788 TraverseOrder(accessor->member(), pre, post);
789 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
790 TraverseOrder(binop->left(), pre, post);
791 TraverseOrder(binop->right(), pre, post);
792 } else if (const BlockNode* block = root->AsBlock()) {
793 for (const auto& statement : block->statements())
794 TraverseOrder(statement.get(), pre, post);
795 TraverseOrder(block->End(), pre, post);
796 } else if (const ConditionNode* condition = root->AsCondition()) {
797 TraverseOrder(condition->condition(), pre, post);
798 TraverseOrder(condition->if_true(), pre, post);
799 TraverseOrder(condition->if_false(), pre, post);
800 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
801 TraverseOrder(func_call->args(), pre, post);
802 TraverseOrder(func_call->block(), pre, post);
803 } else if (root->AsIdentifier()) {
804 // Nothing.
805 } else if (const ListNode* list = root->AsList()) {
806 for (const auto& node : list->contents())
807 TraverseOrder(node.get(), pre, post);
808 TraverseOrder(list->End(), pre, post);
809 } else if (root->AsLiteral()) {
810 // Nothing.
811 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
812 TraverseOrder(unaryop->operand(), pre, post);
813 } else if (root->AsBlockComment()) {
814 // Nothing.
815 } else if (root->AsEnd()) {
816 // Nothing.
817 } else {
818 CHECK(false) << "Unhandled case in TraverseOrder.";
819 }
820
821 post->push_back(root);
822 }
823 }
824
AssignComments(ParseNode * file)825 void Parser::AssignComments(ParseNode* file) {
826 // Start by generating a pre- and post- order traversal of the tree so we
827 // can determine what's before and after comments.
828 std::vector<const ParseNode*> pre;
829 std::vector<const ParseNode*> post;
830 TraverseOrder(file, &pre, &post);
831
832 // Assign line comments to syntax immediately following.
833 int cur_comment = 0;
834 for (auto* node : pre) {
835 if (node->GetRange().is_null()) {
836 CHECK_EQ(node, file) << "Only expected on top file node";
837 continue;
838 }
839 const Location start = node->GetRange().begin();
840 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
841 if (start >= line_comment_tokens_[cur_comment].location()) {
842 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
843 line_comment_tokens_[cur_comment]);
844 ++cur_comment;
845 } else {
846 break;
847 }
848 }
849 }
850
851 // Remaining line comments go at end of file.
852 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
853 ++cur_comment)
854 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
855
856 // Assign suffix to syntax immediately before.
857 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
858 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
859 i != post.rend(); ++i) {
860 // Don't assign suffix comments to the function, list, or block, but instead
861 // to the last thing inside.
862 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
863 continue;
864
865 Location start = (*i)->GetRange().begin();
866 Location end = (*i)->GetRange().end();
867
868 // Don't assign suffix comments to something that starts on an earlier
869 // line, so that in:
870 //
871 // sources = [ "a",
872 // "b" ] # comment
873 //
874 // it's attached to "b", not sources = [ ... ].
875 if (start.line_number() != end.line_number())
876 continue;
877
878 while (cur_comment >= 0) {
879 if (end <= suffix_comment_tokens_[cur_comment].location()) {
880 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
881 suffix_comment_tokens_[cur_comment]);
882 --cur_comment;
883 } else {
884 break;
885 }
886 }
887
888 // Suffix comments were assigned in reverse, so if there were multiple on
889 // the same node, they need to be reversed.
890 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
891 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
892 }
893 }
894
IndentFor(int value)895 std::string IndentFor(int value) {
896 return std::string(value, ' ');
897 }
898
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)899 void RenderToText(const base::Value& node,
900 int indent_level,
901 std::ostringstream& os) {
902 const base::Value* child = node.FindKey(std::string("child"));
903 std::string node_type(node.FindKey("type")->GetString());
904 if (node_type == "ACCESSOR") {
905 // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
906 // for the base.
907 os << IndentFor(indent_level) << node_type << std::endl;
908 os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
909 << std::endl;
910 } else {
911 os << IndentFor(indent_level) << node_type;
912 if (node.FindKey("value")) {
913 os << "(" << node.FindKey("value")->GetString() << ")";
914 }
915 os << std::endl;
916 }
917 if (node.FindKey(kJsonBeforeComment)) {
918 for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
919 os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
920 << "\")\n";
921 }
922 }
923 if (node.FindKey(kJsonSuffixComment)) {
924 for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
925 os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
926 << "\")\n";
927 }
928 }
929 if (node.FindKey(kJsonAfterComment)) {
930 for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
931 os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
932 << "\")\n";
933 }
934 }
935 if (child) {
936 for (const base::Value& n : child->GetList()) {
937 RenderToText(n, indent_level + 1, os);
938 }
939 }
940 const base::Value* end = node.FindKey("end");
941 if (end &&
942 (end->FindKey(kJsonBeforeComment) || end->FindKey(kJsonSuffixComment) ||
943 end->FindKey(kJsonAfterComment))) {
944 RenderToText(*end, indent_level + 1, os);
945 }
946 }
947