1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "gn/parser.h"
6
7 #include <memory>
8 #include <utility>
9
10 #include "base/logging.h"
11 #include "gn/functions.h"
12 #include "gn/operators.h"
13 #include "gn/token.h"
14
15 const char kGrammar_Help[] =
16 R"*(Language and grammar for GN build files
17
18 Tokens
19
20 GN build files are read as sequences of tokens. While splitting the file
21 into tokens, the next token is the longest sequence of characters that form a
22 valid token.
23
24 White space and comments
25
26 White space is comprised of spaces (U+0020), horizontal tabs (U+0009),
27 carriage returns (U+000D), and newlines (U+000A).
28
29 Comments start at the character "#" and stop at the next newline.
30
31 White space and comments are ignored except that they may separate tokens
32 that would otherwise combine into a single token.
33
34 Identifiers
35
36 Identifiers name variables and functions.
37
38 identifier = letter { letter | digit } .
39 letter = "A" ... "Z" | "a" ... "z" | "_" .
40 digit = "0" ... "9" .
41
42 Keywords
43
44 The following keywords are reserved and may not be used as identifiers:
45
46 else false if true
47
48 Integer literals
49
50 An integer literal represents a decimal integer value.
51
52 integer = [ "-" ] digit { digit } .
53
54 Leading zeros and negative zero are disallowed.
55
56 String literals
57
58 A string literal represents a string value consisting of the quoted
59 characters with possible escape sequences and variable expansions.
60
61 string = `"` { char | escape | expansion } `"` .
62 escape = `\` ( "$" | `"` | char ) .
63 BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess "
64 ") "}" .
65 Hex = "0x" [0-9A-Fa-f][0-9A-Fa-f]
66 expansion = "$" ( identifier | BracketExpansion | Hex ) .
67 char = /* any character except "$", `"`, or newline "
68 "*/ .
69
70 After a backslash, certain sequences represent special characters:
71
72 \" U+0022 quotation mark
73 \$ U+0024 dollar sign
74 \\ U+005C backslash
75
76 All other backslashes represent themselves.
77
78 To insert an arbitrary byte value, use $0xFF. For example, to insert a
79 newline character: "Line one$0x0ALine two".
80
81 An expansion will evaluate the variable following the '$' and insert a
82 stringified version of it into the result. For example, to concat two path
83 components with a slash separating them:
84 "$var_one/$var_two"
85 Use the "${var_one}" format to be explicitly deliniate the variable for
86 otherwise-ambiguous cases.
87
88 Punctuation
89
90 The following character sequences represent punctuation:
91
92 + += == != ( )
93 - -= < <= [ ]
94 ! = > >= { }
95 && || . ,
96
97 Grammar
98
99 The input tokens form a syntax tree following a context-free grammar:
100
101 File = StatementList .
102
103 Statement = Assignment | Call | Condition .
104 LValue = identifier | ArrayAccess | ScopeAccess .
105 Assignment = LValue AssignOp Expr .
106 Call = identifier "(" [ ExprList ] ")" [ Block ] .
107 Condition = "if" "(" Expr ")" Block
108 [ "else" ( Condition | Block ) ] .
109 Block = "{" StatementList "}" .
110 StatementList = { Statement } .
111
112 ArrayAccess = identifier "[" Expr "]" .
113 ScopeAccess = identifier "." identifier .
114 Expr = UnaryExpr | Expr BinaryOp Expr .
115 UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr .
116 PrimaryExpr = identifier | integer | string | Call
117 | ArrayAccess | ScopeAccess | Block
118 | "(" Expr ")"
119 | "[" [ ExprList [ "," ] ] "]" .
120 ExprList = Expr { "," Expr } .
121
122 AssignOp = "=" | "+=" | "-=" .
123 UnaryOp = "!" .
124 BinaryOp = "+" | "-" // highest priority
125 | "<" | "<=" | ">" | ">="
126 | "==" | "!="
127 | "&&"
128 | "||" . // lowest priority
129
130 All binary operators are left-associative.
131
132 Types
133
134 The GN language is dynamically typed. The following types are used:
135
136 - Boolean: Uses the keywords "true" and "false". There is no implicit
137 conversion between booleans and integers.
138
139 - Integers: All numbers in GN are signed 64-bit integers.
140
141 - Strings: Strings are 8-bit with no enforced encoding. When a string is
142 used to interact with other systems with particular encodings (like the
143 Windows and Mac filesystems) it is assumed to be UTF-8. See "String
144 literals" above for more.
145
146 - Lists: Lists are arbitrary-length ordered lists of values. See "Lists"
147 below for more.
148
149 - Scopes: Scopes are like dictionaries that use variable names for keys. See
150 "Scopes" below for more.
151
152 Lists
153
154 Lists are created with [] and using commas to separate items:
155
156 mylist = [ 0, 1, 2, "some string" ]
157
158 A comma after the last item is optional. Lists are dereferenced using 0-based
159 indexing:
160
161 mylist[0] += 1
162 var = mylist[2]
163
164 Lists can be concatenated using the '+' and '+=' operators. Bare values can
165 not be concatenated with lists, to add a single item, it must be put into a
166 list of length one.
167
168 Items can be removed from lists using the '-' and '-=' operators. This will
169 remove all occurrences of every item in the right-hand list from the
170 left-hand list. It is an error to remove an item not in the list. This is to
171 prevent common typos and to detect dead code that is removing things that no
172 longer apply.
173
174 It is an error to use '=' to replace a nonempty list with another nonempty
175 list. This is to prevent accidentally overwriting data when in most cases
176 '+=' was intended. To overwrite a list on purpose, first assign it to the
177 empty list:
178
179 mylist = []
180 mylist = otherlist
181
182 When assigning to a list named 'sources' using '=' or '+=', list items may be
183 automatically filtered out. See "gn help set_sources_assignment_filter" for
184 more.
185
186 Scopes
187
188 All execution happens in the context of a scope which holds the current state
189 (like variables). With the exception of loops and conditions, '{' introduces
190 a new scope that has a parent reference to the old scope.
191
192 Variable reads recursively search all nested scopes until the variable is
193 found or there are no more scopes. Variable writes always go into the current
194 scope. This means that after the closing '}' (again excepting loops and
195 conditions), all local variables will be restored to the previous values.
196 This also means that "foo = foo" can do useful work by copying a variable
197 into the current scope that was defined in a containing scope.
198
199 Scopes can also be assigned to variables. Such scopes can be created by
200 functions like exec_script, when invoking a template (the template code
201 refers to the variables set by the invoking code by the implicitly-created
202 "invoker" scope), or explicitly like:
203
204 empty_scope = {}
205 myvalues = {
206 foo = 21
207 bar = "something"
208 }
209
210 Inside such a scope definition can be any GN code including conditionals and
211 function calls. After the close of the scope, it will contain all variables
212 explicitly set by the code contained inside it. After this, the values can be
213 read, modified, or added to:
214
215 myvalues.foo += 2
216 empty_scope.new_thing = [ 1, 2, 3 ]
217
218 Scope equality is defined as single-level scopes identical within the current
219 scope. That is, all values in the first scope must be present and identical
220 within the second, and vice versa. Note that this means inherited scopes are
221 always unequal by definition.
222 )*";
223
224 // Precedence constants.
225 //
226 // Currently all operators are left-associative so this list is sequential. To
227 // implement a right-assocative operators in a Pratt parser we would leave gaps
228 // in between the constants, and right-associative operators get a precedence
229 // of "<left-associated-precedence> - 1".
230 enum Precedence {
231 PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence.
232 PRECEDENCE_OR = 2,
233 PRECEDENCE_AND = 3,
234 PRECEDENCE_EQUALITY = 4,
235 PRECEDENCE_RELATION = 5,
236 PRECEDENCE_SUM = 6,
237 PRECEDENCE_PREFIX = 7,
238 PRECEDENCE_CALL = 8,
239 PRECEDENCE_DOT = 9, // Highest precedence.
240 };
241
242 // The top-level for blocks/ifs is recursive descent, the expression parser is
243 // a Pratt parser. The basic idea there is to have the precedences (and
244 // associativities) encoded relative to each other and only parse up until you
245 // hit something of that precedence. There's a dispatch table in expressions_
246 // at the top of parser.cc that describes how each token dispatches if it's
247 // seen as either a prefix or infix operator, and if it's infix, what its
248 // precedence is.
249 //
250 // References:
251 // http://javascript.crockford.com/tdop/tdop.html
252 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
253
254 // Indexed by Token::Type.
255 ParserHelper Parser::expressions_[] = {
256 {nullptr, nullptr, -1}, // INVALID
257 {&Parser::Literal, nullptr, -1}, // INTEGER
258 {&Parser::Literal, nullptr, -1}, // STRING
259 {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN
260 {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN
261 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL
262 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS
263 {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS
264 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS
265 {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS
266 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL
267 {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL
268 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL
269 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL
270 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN
271 {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN
272 {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND
273 {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR
274 {&Parser::Not, nullptr, -1}, // BANG
275 {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT
276 {&Parser::Group, nullptr, -1}, // LEFT_PAREN
277 {nullptr, nullptr, -1}, // RIGHT_PAREN
278 {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET
279 {nullptr, nullptr, -1}, // RIGHT_BRACKET
280 {&Parser::Block, nullptr, -1}, // LEFT_BRACE
281 {nullptr, nullptr, -1}, // RIGHT_BRACE
282 {nullptr, nullptr, -1}, // IF
283 {nullptr, nullptr, -1}, // ELSE
284 {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER
285 {nullptr, nullptr, -1}, // COMMA
286 {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT
287 {nullptr, nullptr, -1}, // LINE_COMMENT
288 {nullptr, nullptr, -1}, // SUFFIX_COMMENT
289 {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT
290 };
291
Parser(const std::vector<Token> & tokens,Err * err)292 Parser::Parser(const std::vector<Token>& tokens, Err* err)
293 : invalid_token_(Location(), Token::INVALID, std::string_view()),
294 err_(err),
295 cur_(0) {
296 for (const auto& token : tokens) {
297 switch (token.type()) {
298 case Token::LINE_COMMENT:
299 line_comment_tokens_.push_back(token);
300 break;
301 case Token::SUFFIX_COMMENT:
302 suffix_comment_tokens_.push_back(token);
303 break;
304 default:
305 // Note that BLOCK_COMMENTs (top-level standalone comments) are passed
306 // through the real parser.
307 tokens_.push_back(token);
308 break;
309 }
310 }
311 }
312
313 Parser::~Parser() = default;
314
315 // static
Parse(const std::vector<Token> & tokens,Err * err)316 std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens,
317 Err* err) {
318 Parser p(tokens, err);
319 return p.ParseFile();
320 }
321
322 // static
ParseExpression(const std::vector<Token> & tokens,Err * err)323 std::unique_ptr<ParseNode> Parser::ParseExpression(
324 const std::vector<Token>& tokens,
325 Err* err) {
326 Parser p(tokens, err);
327 std::unique_ptr<ParseNode> expr = p.ParseExpression();
328 if (!p.at_end() && !err->has_error()) {
329 *err = Err(p.cur_token(), "Trailing garbage");
330 return nullptr;
331 }
332 return expr;
333 }
334
335 // static
ParseValue(const std::vector<Token> & tokens,Err * err)336 std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens,
337 Err* err) {
338 for (const Token& token : tokens) {
339 switch (token.type()) {
340 case Token::INTEGER:
341 case Token::STRING:
342 case Token::TRUE_TOKEN:
343 case Token::FALSE_TOKEN:
344 case Token::LEFT_BRACKET:
345 case Token::RIGHT_BRACKET:
346 case Token::COMMA:
347 continue;
348 default:
349 *err = Err(token, "Invalid token in literal value");
350 return nullptr;
351 }
352 }
353
354 return ParseExpression(tokens, err);
355 }
356
IsAssignment(const ParseNode * node) const357 bool Parser::IsAssignment(const ParseNode* node) const {
358 return node && node->AsBinaryOp() &&
359 (node->AsBinaryOp()->op().type() == Token::EQUAL ||
360 node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS ||
361 node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS);
362 }
363
IsStatementBreak(Token::Type token_type) const364 bool Parser::IsStatementBreak(Token::Type token_type) const {
365 switch (token_type) {
366 case Token::IDENTIFIER:
367 case Token::LEFT_BRACE:
368 case Token::RIGHT_BRACE:
369 case Token::IF:
370 case Token::ELSE:
371 return true;
372 default:
373 return false;
374 }
375 }
376
LookAhead(Token::Type type)377 bool Parser::LookAhead(Token::Type type) {
378 if (at_end())
379 return false;
380 return cur_token().type() == type;
381 }
382
Match(Token::Type type)383 bool Parser::Match(Token::Type type) {
384 if (!LookAhead(type))
385 return false;
386 Consume();
387 return true;
388 }
389
Consume(Token::Type type,const char * error_message)390 const Token& Parser::Consume(Token::Type type, const char* error_message) {
391 Token::Type types[1] = {type};
392 return Consume(types, 1, error_message);
393 }
394
Consume(Token::Type * types,size_t num_types,const char * error_message)395 const Token& Parser::Consume(Token::Type* types,
396 size_t num_types,
397 const char* error_message) {
398 if (has_error()) {
399 // Don't overwrite current error, but make progress through tokens so that
400 // a loop that's expecting a particular token will still terminate.
401 if (!at_end())
402 cur_++;
403 return invalid_token_;
404 }
405 if (at_end()) {
406 const char kEOFMsg[] = "I hit EOF instead.";
407 if (tokens_.empty())
408 *err_ = Err(Location(), error_message, kEOFMsg);
409 else
410 *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg);
411 return invalid_token_;
412 }
413
414 for (size_t i = 0; i < num_types; ++i) {
415 if (cur_token().type() == types[i])
416 return Consume();
417 }
418 *err_ = Err(cur_token(), error_message);
419 return invalid_token_;
420 }
421
Consume()422 const Token& Parser::Consume() {
423 return tokens_[cur_++];
424 }
425
ParseExpression()426 std::unique_ptr<ParseNode> Parser::ParseExpression() {
427 return ParseExpression(0);
428 }
429
ParseExpression(int precedence)430 std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) {
431 if (at_end())
432 return std::unique_ptr<ParseNode>();
433
434 const Token& token = Consume();
435 PrefixFunc prefix = expressions_[token.type()].prefix;
436
437 if (prefix == nullptr) {
438 *err_ = Err(token, "Unexpected token '" + std::string(token.value()) + "'");
439 return std::unique_ptr<ParseNode>();
440 }
441
442 std::unique_ptr<ParseNode> left = (this->*prefix)(token);
443 if (has_error())
444 return left;
445
446 while (!at_end() && !IsStatementBreak(cur_token().type()) &&
447 precedence <= expressions_[cur_token().type()].precedence) {
448 const Token& next_token = Consume();
449 InfixFunc infix = expressions_[next_token.type()].infix;
450 if (infix == nullptr) {
451 *err_ = Err(next_token,
452 "Unexpected token '" + std::string(next_token.value()) + "'");
453 return std::unique_ptr<ParseNode>();
454 }
455 left = (this->*infix)(std::move(left), next_token);
456 if (has_error())
457 return std::unique_ptr<ParseNode>();
458 }
459
460 return left;
461 }
462
Block(const Token & token)463 std::unique_ptr<ParseNode> Parser::Block(const Token& token) {
464 // This entrypoint into ParseBlock means it's part of an expression and we
465 // always want the result.
466 return ParseBlock(token, BlockNode::RETURNS_SCOPE);
467 }
468
Literal(const Token & token)469 std::unique_ptr<ParseNode> Parser::Literal(const Token& token) {
470 return std::make_unique<LiteralNode>(token);
471 }
472
Name(const Token & token)473 std::unique_ptr<ParseNode> Parser::Name(const Token& token) {
474 return IdentifierOrCall(std::unique_ptr<ParseNode>(), token);
475 }
476
BlockComment(const Token & token)477 std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) {
478 std::unique_ptr<BlockCommentNode> comment =
479 std::make_unique<BlockCommentNode>();
480 comment->set_comment(token);
481 return std::move(comment);
482 }
483
Group(const Token & token)484 std::unique_ptr<ParseNode> Parser::Group(const Token& token) {
485 std::unique_ptr<ParseNode> expr = ParseExpression();
486 if (has_error())
487 return std::unique_ptr<ParseNode>();
488 Consume(Token::RIGHT_PAREN, "Expected ')'");
489 return expr;
490 }
491
Not(const Token & token)492 std::unique_ptr<ParseNode> Parser::Not(const Token& token) {
493 std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1);
494 if (has_error())
495 return std::unique_ptr<ParseNode>();
496 if (!expr) {
497 if (!has_error())
498 *err_ = Err(token, "Expected right-hand side for '!'.");
499 return std::unique_ptr<ParseNode>();
500 }
501 std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>();
502 unary_op->set_op(token);
503 unary_op->set_operand(std::move(expr));
504 return std::move(unary_op);
505 }
506
List(const Token & node)507 std::unique_ptr<ParseNode> Parser::List(const Token& node) {
508 std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true));
509 if (!has_error() && !at_end())
510 Consume(Token::RIGHT_BRACKET, "Expected ']'");
511 return list;
512 }
513
BinaryOperator(std::unique_ptr<ParseNode> left,const Token & token)514 std::unique_ptr<ParseNode> Parser::BinaryOperator(
515 std::unique_ptr<ParseNode> left,
516 const Token& token) {
517 std::unique_ptr<ParseNode> right =
518 ParseExpression(expressions_[token.type()].precedence + 1);
519 if (!right) {
520 if (!has_error()) {
521 *err_ = Err(token, "Expected right-hand side for '" +
522 std::string(token.value()) + "'");
523 }
524 return std::unique_ptr<ParseNode>();
525 }
526 std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>();
527 binary_op->set_op(token);
528 binary_op->set_left(std::move(left));
529 binary_op->set_right(std::move(right));
530 return std::move(binary_op);
531 }
532
IdentifierOrCall(std::unique_ptr<ParseNode> left,const Token & token)533 std::unique_ptr<ParseNode> Parser::IdentifierOrCall(
534 std::unique_ptr<ParseNode> left,
535 const Token& token) {
536 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
537 list->set_begin_token(token);
538 list->set_end(std::make_unique<EndNode>(token));
539 std::unique_ptr<BlockNode> block;
540 bool has_arg = false;
541 if (LookAhead(Token::LEFT_PAREN)) {
542 const Token& start_token = Consume();
543 // Parsing a function call.
544 has_arg = true;
545 if (Match(Token::RIGHT_PAREN)) {
546 // Nothing, just an empty call.
547 } else {
548 list = ParseList(start_token, Token::RIGHT_PAREN, false);
549 if (has_error())
550 return std::unique_ptr<ParseNode>();
551 Consume(Token::RIGHT_PAREN, "Expected ')' after call");
552 }
553 // Optionally with a scope.
554 if (LookAhead(Token::LEFT_BRACE)) {
555 block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT);
556 if (has_error())
557 return std::unique_ptr<ParseNode>();
558 }
559 }
560
561 if (!left && !has_arg) {
562 // Not a function call, just a standalone identifier.
563 return std::make_unique<IdentifierNode>(token);
564 }
565 std::unique_ptr<FunctionCallNode> func_call =
566 std::make_unique<FunctionCallNode>();
567 func_call->set_function(token);
568 func_call->set_args(std::move(list));
569 if (block)
570 func_call->set_block(std::move(block));
571 return std::move(func_call);
572 }
573
Assignment(std::unique_ptr<ParseNode> left,const Token & token)574 std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left,
575 const Token& token) {
576 if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) {
577 *err_ = Err(left.get(),
578 "The left-hand side of an assignment must be an identifier, "
579 "scope access, or array access.");
580 return std::unique_ptr<ParseNode>();
581 }
582 std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT);
583 if (!value) {
584 if (!has_error())
585 *err_ = Err(token, "Expected right-hand side for assignment.");
586 return std::unique_ptr<ParseNode>();
587 }
588 std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>();
589 assign->set_op(token);
590 assign->set_left(std::move(left));
591 assign->set_right(std::move(value));
592 return std::move(assign);
593 }
594
Subscript(std::unique_ptr<ParseNode> left,const Token & token)595 std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left,
596 const Token& token) {
597 // TODO: Maybe support more complex expressions like a[0][0]. This would
598 // require work on the evaluator too.
599 if (left->AsIdentifier() == nullptr) {
600 *err_ = Err(
601 left.get(), "May only subscript identifiers.",
602 "The thing on the left hand side of the [] must be an identifier\n"
603 "and not an expression. If you need this, you'll have to assign the\n"
604 "value to a temporary before subscripting. Sorry.");
605 return std::unique_ptr<ParseNode>();
606 }
607 std::unique_ptr<ParseNode> value = ParseExpression();
608 Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript.");
609 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
610 accessor->set_base(left->AsIdentifier()->value());
611 accessor->set_subscript(std::move(value));
612 return std::move(accessor);
613 }
614
DotOperator(std::unique_ptr<ParseNode> left,const Token & token)615 std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left,
616 const Token& token) {
617 if (left->AsIdentifier() == nullptr) {
618 *err_ = Err(
619 left.get(), "May only use \".\" for identifiers.",
620 "The thing on the left hand side of the dot must be an identifier\n"
621 "and not an expression. If you need this, you'll have to assign the\n"
622 "value to a temporary first. Sorry.");
623 return std::unique_ptr<ParseNode>();
624 }
625
626 std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT);
627 if (!right || !right->AsIdentifier()) {
628 *err_ = Err(
629 token, "Expected identifier for right-hand-side of \".\"",
630 "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()");
631 return std::unique_ptr<ParseNode>();
632 }
633
634 std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>();
635 accessor->set_base(left->AsIdentifier()->value());
636 accessor->set_member(std::unique_ptr<IdentifierNode>(
637 static_cast<IdentifierNode*>(right.release())));
638 return std::move(accessor);
639 }
640
641 // Does not Consume the start or end token.
ParseList(const Token & start_token,Token::Type stop_before,bool allow_trailing_comma)642 std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token,
643 Token::Type stop_before,
644 bool allow_trailing_comma) {
645 std::unique_ptr<ListNode> list = std::make_unique<ListNode>();
646 list->set_begin_token(start_token);
647 bool just_got_comma = false;
648 bool first_time = true;
649 while (!LookAhead(stop_before)) {
650 if (!first_time) {
651 if (!just_got_comma) {
652 // Require commas separate things in lists.
653 *err_ = Err(cur_token(), "Expected comma between items.");
654 return std::unique_ptr<ListNode>();
655 }
656 }
657 first_time = false;
658
659 // Why _OR? We're parsing things that are higher precedence than the ,
660 // that separates the items of the list. , should appear lower than
661 // boolean expressions (the lowest of which is OR), but above assignments.
662 list->append_item(ParseExpression(PRECEDENCE_OR));
663 if (has_error())
664 return std::unique_ptr<ListNode>();
665 if (at_end()) {
666 *err_ =
667 Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list.");
668 return std::unique_ptr<ListNode>();
669 }
670 if (list->contents().back()->AsBlockComment()) {
671 // If there was a comment inside the list, we don't need a comma to the
672 // next item, so pretend we got one, if we're expecting one.
673 just_got_comma = allow_trailing_comma;
674 } else {
675 just_got_comma = Match(Token::COMMA);
676 }
677 }
678 if (just_got_comma && !allow_trailing_comma) {
679 *err_ = Err(cur_token(), "Trailing comma");
680 return std::unique_ptr<ListNode>();
681 }
682 list->set_end(std::make_unique<EndNode>(cur_token()));
683 return list;
684 }
685
ParseFile()686 std::unique_ptr<ParseNode> Parser::ParseFile() {
687 std::unique_ptr<BlockNode> file =
688 std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT);
689 for (;;) {
690 if (at_end())
691 break;
692 std::unique_ptr<ParseNode> statement = ParseStatement();
693 if (!statement)
694 break;
695 file->append_statement(std::move(statement));
696 }
697 if (!at_end() && !has_error())
698 *err_ = Err(cur_token(), "Unexpected here, should be newline.");
699 if (has_error())
700 return std::unique_ptr<ParseNode>();
701
702 // TODO(scottmg): If this is measurably expensive, it could be done only
703 // when necessary (when reformatting, or during tests). Comments are
704 // separate from the parse tree at this point, so downstream code can remain
705 // ignorant of them.
706 AssignComments(file.get());
707
708 return std::move(file);
709 }
710
ParseStatement()711 std::unique_ptr<ParseNode> Parser::ParseStatement() {
712 if (LookAhead(Token::IF)) {
713 return ParseCondition();
714 } else if (LookAhead(Token::BLOCK_COMMENT)) {
715 return BlockComment(Consume());
716 } else {
717 // TODO(scottmg): Is this too strict? Just drop all the testing if we want
718 // to allow "pointless" expressions and return ParseExpression() directly.
719 std::unique_ptr<ParseNode> stmt = ParseExpression();
720 if (stmt) {
721 if (stmt->AsFunctionCall() || IsAssignment(stmt.get()))
722 return stmt;
723 }
724 if (!has_error()) {
725 const Token& token = cur_or_last_token();
726 *err_ = Err(token, "Expecting assignment or function call.");
727 }
728 return std::unique_ptr<ParseNode>();
729 }
730 }
731
ParseBlock(const Token & begin_brace,BlockNode::ResultMode result_mode)732 std::unique_ptr<BlockNode> Parser::ParseBlock(
733 const Token& begin_brace,
734 BlockNode::ResultMode result_mode) {
735 if (has_error())
736 return std::unique_ptr<BlockNode>();
737 std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode);
738 block->set_begin_token(begin_brace);
739
740 for (;;) {
741 if (LookAhead(Token::RIGHT_BRACE)) {
742 block->set_end(std::make_unique<EndNode>(Consume()));
743 break;
744 }
745
746 std::unique_ptr<ParseNode> statement = ParseStatement();
747 if (!statement)
748 return std::unique_ptr<BlockNode>();
749 block->append_statement(std::move(statement));
750 }
751 return block;
752 }
753
ParseCondition()754 std::unique_ptr<ParseNode> Parser::ParseCondition() {
755 std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>();
756 condition->set_if_token(Consume(Token::IF, "Expected 'if'"));
757 Consume(Token::LEFT_PAREN, "Expected '(' after 'if'.");
758 condition->set_condition(ParseExpression());
759 if (IsAssignment(condition->condition()))
760 *err_ = Err(condition->condition(), "Assignment not allowed in 'if'.");
761 Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'.");
762 condition->set_if_true(ParseBlock(
763 Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."),
764 BlockNode::DISCARDS_RESULT));
765 if (Match(Token::ELSE)) {
766 if (LookAhead(Token::LEFT_BRACE)) {
767 condition->set_if_false(
768 ParseBlock(Consume(), BlockNode::DISCARDS_RESULT));
769 } else if (LookAhead(Token::IF)) {
770 condition->set_if_false(ParseStatement());
771 } else {
772 *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'.");
773 return std::unique_ptr<ParseNode>();
774 }
775 }
776 if (has_error())
777 return std::unique_ptr<ParseNode>();
778 return std::move(condition);
779 }
780
TraverseOrder(const ParseNode * root,std::vector<const ParseNode * > * pre,std::vector<const ParseNode * > * post)781 void Parser::TraverseOrder(const ParseNode* root,
782 std::vector<const ParseNode*>* pre,
783 std::vector<const ParseNode*>* post) {
784 if (root) {
785 pre->push_back(root);
786
787 if (const AccessorNode* accessor = root->AsAccessor()) {
788 TraverseOrder(accessor->subscript(), pre, post);
789 TraverseOrder(accessor->member(), pre, post);
790 } else if (const BinaryOpNode* binop = root->AsBinaryOp()) {
791 TraverseOrder(binop->left(), pre, post);
792 TraverseOrder(binop->right(), pre, post);
793 } else if (const BlockNode* block = root->AsBlock()) {
794 for (const auto& statement : block->statements())
795 TraverseOrder(statement.get(), pre, post);
796 TraverseOrder(block->End(), pre, post);
797 } else if (const ConditionNode* condition = root->AsConditionNode()) {
798 TraverseOrder(condition->condition(), pre, post);
799 TraverseOrder(condition->if_true(), pre, post);
800 TraverseOrder(condition->if_false(), pre, post);
801 } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) {
802 TraverseOrder(func_call->args(), pre, post);
803 TraverseOrder(func_call->block(), pre, post);
804 } else if (root->AsIdentifier()) {
805 // Nothing.
806 } else if (const ListNode* list = root->AsList()) {
807 for (const auto& node : list->contents())
808 TraverseOrder(node.get(), pre, post);
809 TraverseOrder(list->End(), pre, post);
810 } else if (root->AsLiteral()) {
811 // Nothing.
812 } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) {
813 TraverseOrder(unaryop->operand(), pre, post);
814 } else if (root->AsBlockComment()) {
815 // Nothing.
816 } else if (root->AsEnd()) {
817 // Nothing.
818 } else {
819 CHECK(false) << "Unhandled case in TraverseOrder.";
820 }
821
822 post->push_back(root);
823 }
824 }
825
AssignComments(ParseNode * file)826 void Parser::AssignComments(ParseNode* file) {
827 // Start by generating a pre- and post- order traversal of the tree so we
828 // can determine what's before and after comments.
829 std::vector<const ParseNode*> pre;
830 std::vector<const ParseNode*> post;
831 TraverseOrder(file, &pre, &post);
832
833 // Assign line comments to syntax immediately following.
834 int cur_comment = 0;
835 for (auto* node : pre) {
836 if (node->GetRange().is_null()) {
837 CHECK_EQ(node, file) << "Only expected on top file node";
838 continue;
839 }
840 const Location start = node->GetRange().begin();
841 while (cur_comment < static_cast<int>(line_comment_tokens_.size())) {
842 if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) {
843 const_cast<ParseNode*>(node)->comments_mutable()->append_before(
844 line_comment_tokens_[cur_comment]);
845 ++cur_comment;
846 } else {
847 break;
848 }
849 }
850 }
851
852 // Remaining line comments go at end of file.
853 for (; cur_comment < static_cast<int>(line_comment_tokens_.size());
854 ++cur_comment)
855 file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]);
856
857 // Assign suffix to syntax immediately before.
858 cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1);
859 for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin();
860 i != post.rend(); ++i) {
861 // Don't assign suffix comments to the function, list, or block, but instead
862 // to the last thing inside.
863 if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock())
864 continue;
865
866 Location start = (*i)->GetRange().begin();
867 Location end = (*i)->GetRange().end();
868
869 // Don't assign suffix comments to something that starts on an earlier
870 // line, so that in:
871 //
872 // sources = [ "a",
873 // "b" ] # comment
874 //
875 // it's attached to "b", not sources = [ ... ].
876 if (start.line_number() != end.line_number())
877 continue;
878
879 while (cur_comment >= 0) {
880 if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) {
881 const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix(
882 suffix_comment_tokens_[cur_comment]);
883 --cur_comment;
884 } else {
885 break;
886 }
887 }
888
889 // Suffix comments were assigned in reverse, so if there were multiple on
890 // the same node, they need to be reversed.
891 if ((*i)->comments() && !(*i)->comments()->suffix().empty())
892 const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix();
893 }
894 }
895
IndentFor(int value)896 std::string IndentFor(int value) {
897 return std::string(value, ' ');
898 }
899
RenderToText(const base::Value & node,int indent_level,std::ostringstream & os)900 void RenderToText(const base::Value& node,
901 int indent_level,
902 std::ostringstream& os) {
903 const base::Value* child = node.FindKey(std::string("child"));
904 std::string node_type(node.FindKey("type")->GetString());
905 if (node_type == "ACCESSOR") {
906 // AccessorNode is a bit special, in that it holds a Token, not a ParseNode
907 // for the base.
908 os << IndentFor(indent_level) << node_type << std::endl;
909 os << IndentFor(indent_level + 1) << node.FindKey("value")->GetString()
910 << std::endl;
911 } else {
912 os << IndentFor(indent_level) << node_type;
913 if (node.FindKey("value")) {
914 os << "(" << node.FindKey("value")->GetString() << ")";
915 }
916 os << std::endl;
917 }
918 if (node.FindKey(kJsonBeforeComment)) {
919 for (auto& v : node.FindKey(kJsonBeforeComment)->GetList()) {
920 os << IndentFor(indent_level + 1) << "+BEFORE_COMMENT(\"" << v.GetString()
921 << "\")\n";
922 }
923 }
924 if (node.FindKey(kJsonSuffixComment)) {
925 for (auto& v : node.FindKey(kJsonSuffixComment)->GetList()) {
926 os << IndentFor(indent_level + 1) << "+SUFFIX_COMMENT(\"" << v.GetString()
927 << "\")\n";
928 }
929 }
930 if (node.FindKey(kJsonAfterComment)) {
931 for (auto& v : node.FindKey(kJsonAfterComment)->GetList()) {
932 os << IndentFor(indent_level + 1) << "+AFTER_COMMENT(\"" << v.GetString()
933 << "\")\n";
934 }
935 }
936 if (child) {
937 for (const base::Value& n : child->GetList()) {
938 RenderToText(n, indent_level + 1, os);
939 }
940 }
941 }
942