1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28 #include "v8.h"
29
30 #include "api.h"
31 #include "ast.h"
32 #include "bootstrapper.h"
33 #include "compiler.h"
34 #include "platform.h"
35 #include "runtime.h"
36 #include "parser.h"
37 #include "scopes.h"
38 #include "string-stream.h"
39
40 namespace v8 {
41 namespace internal {
42
43 class ParserFactory;
44 class ParserLog;
45 class TemporaryScope;
46 class Target;
47
48 template <typename T> class ZoneListWrapper;
49
50
51 // PositionStack is used for on-stack allocation of token positions for
52 // new expressions. Please look at ParseNewExpression.
53
54 class PositionStack {
55 public:
PositionStack(bool * ok)56 explicit PositionStack(bool* ok) : top_(NULL), ok_(ok) {}
~PositionStack()57 ~PositionStack() { ASSERT(!*ok_ || is_empty()); }
58
59 class Element {
60 public:
Element(PositionStack * stack,int value)61 Element(PositionStack* stack, int value) {
62 previous_ = stack->top();
63 value_ = value;
64 stack->set_top(this);
65 }
66
67 private:
previous()68 Element* previous() { return previous_; }
value()69 int value() { return value_; }
70 friend class PositionStack;
71 Element* previous_;
72 int value_;
73 };
74
is_empty()75 bool is_empty() { return top_ == NULL; }
pop()76 int pop() {
77 ASSERT(!is_empty());
78 int result = top_->value();
79 top_ = top_->previous();
80 return result;
81 }
82
83 private:
top()84 Element* top() { return top_; }
set_top(Element * value)85 void set_top(Element* value) { top_ = value; }
86 Element* top_;
87 bool* ok_;
88 };
89
90
91 class Parser {
92 public:
93 Parser(Handle<Script> script, bool allow_natives_syntax,
94 v8::Extension* extension, bool is_pre_parsing,
95 ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
~Parser()96 virtual ~Parser() { }
97
98 // Pre-parse the program from the character stream; returns true on
99 // success, false if a stack-overflow happened during parsing.
100 bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
101
102 void ReportMessage(const char* message, Vector<const char*> args);
103 virtual void ReportMessageAt(Scanner::Location loc,
104 const char* message,
105 Vector<const char*> args) = 0;
106
107
108 // Returns NULL if parsing failed.
109 FunctionLiteral* ParseProgram(Handle<String> source,
110 unibrow::CharacterStream* stream,
111 bool in_global_context);
112 FunctionLiteral* ParseLazy(Handle<String> source,
113 Handle<String> name,
114 int start_position, bool is_expression);
115
116 // The minimum number of contiguous assignment that will
117 // be treated as an initialization block. Benchmarks show that
118 // the overhead exceeds the savings below this limit.
119 static const int kMinInitializationBlock = 3;
120
121 protected:
122
123 enum Mode {
124 PARSE_LAZILY,
125 PARSE_EAGERLY
126 };
127
128 // Report syntax error
129 void ReportUnexpectedToken(Token::Value token);
130
131 Handle<Script> script_;
132 Scanner scanner_;
133
134 Scope* top_scope_;
135 int with_nesting_level_;
136
137 TemporaryScope* temp_scope_;
138 Mode mode_;
139
140 Target* target_stack_; // for break, continue statements
141 bool allow_natives_syntax_;
142 v8::Extension* extension_;
143 ParserFactory* factory_;
144 ParserLog* log_;
145 bool is_pre_parsing_;
146 ScriptDataImpl* pre_data_;
147
inside_with() const148 bool inside_with() const { return with_nesting_level_ > 0; }
factory() const149 ParserFactory* factory() const { return factory_; }
log() const150 ParserLog* log() const { return log_; }
scanner()151 Scanner& scanner() { return scanner_; }
mode() const152 Mode mode() const { return mode_; }
pre_data() const153 ScriptDataImpl* pre_data() const { return pre_data_; }
154
155 // All ParseXXX functions take as the last argument an *ok parameter
156 // which is set to false if parsing failed; it is unchanged otherwise.
157 // By making the 'exception handling' explicit, we are forced to check
158 // for failure at the call sites.
159 void* ParseSourceElements(ZoneListWrapper<Statement>* processor,
160 int end_token, bool* ok);
161 Statement* ParseStatement(ZoneStringList* labels, bool* ok);
162 Statement* ParseFunctionDeclaration(bool* ok);
163 Statement* ParseNativeDeclaration(bool* ok);
164 Block* ParseBlock(ZoneStringList* labels, bool* ok);
165 Block* ParseVariableStatement(bool* ok);
166 Block* ParseVariableDeclarations(bool accept_IN, Expression** var, bool* ok);
167 Statement* ParseExpressionOrLabelledStatement(ZoneStringList* labels,
168 bool* ok);
169 IfStatement* ParseIfStatement(ZoneStringList* labels, bool* ok);
170 Statement* ParseContinueStatement(bool* ok);
171 Statement* ParseBreakStatement(ZoneStringList* labels, bool* ok);
172 Statement* ParseReturnStatement(bool* ok);
173 Block* WithHelper(Expression* obj,
174 ZoneStringList* labels,
175 bool is_catch_block,
176 bool* ok);
177 Statement* ParseWithStatement(ZoneStringList* labels, bool* ok);
178 CaseClause* ParseCaseClause(bool* default_seen_ptr, bool* ok);
179 SwitchStatement* ParseSwitchStatement(ZoneStringList* labels, bool* ok);
180 LoopStatement* ParseDoStatement(ZoneStringList* labels, bool* ok);
181 LoopStatement* ParseWhileStatement(ZoneStringList* labels, bool* ok);
182 Statement* ParseForStatement(ZoneStringList* labels, bool* ok);
183 Statement* ParseThrowStatement(bool* ok);
184 Expression* MakeCatchContext(Handle<String> id, VariableProxy* value);
185 TryStatement* ParseTryStatement(bool* ok);
186 DebuggerStatement* ParseDebuggerStatement(bool* ok);
187
188 Expression* ParseExpression(bool accept_IN, bool* ok);
189 Expression* ParseAssignmentExpression(bool accept_IN, bool* ok);
190 Expression* ParseConditionalExpression(bool accept_IN, bool* ok);
191 Expression* ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
192 Expression* ParseUnaryExpression(bool* ok);
193 Expression* ParsePostfixExpression(bool* ok);
194 Expression* ParseLeftHandSideExpression(bool* ok);
195 Expression* ParseNewExpression(bool* ok);
196 Expression* ParseMemberExpression(bool* ok);
197 Expression* ParseNewPrefix(PositionStack* stack, bool* ok);
198 Expression* ParseMemberWithNewPrefixesExpression(PositionStack* stack,
199 bool* ok);
200 Expression* ParsePrimaryExpression(bool* ok);
201 Expression* ParseArrayLiteral(bool* ok);
202 Expression* ParseObjectLiteral(bool* ok);
203 Expression* ParseRegExpLiteral(bool seen_equal, bool* ok);
204
205 // Decide if a property should be the object boilerplate.
206 bool IsBoilerplateProperty(ObjectLiteral::Property* property);
207 // If the expression is a literal, return the literal value;
208 // if the expression is a materialized literal and is simple return a
209 // compile time value as encoded by CompileTimeValue::GetValue().
210 // Otherwise, return undefined literal as the placeholder
211 // in the object literal boilerplate.
212 Handle<Object> GetBoilerplateValue(Expression* expression);
213
214 enum FunctionLiteralType {
215 EXPRESSION,
216 DECLARATION,
217 NESTED
218 };
219
220 ZoneList<Expression*>* ParseArguments(bool* ok);
221 FunctionLiteral* ParseFunctionLiteral(Handle<String> var_name,
222 int function_token_position,
223 FunctionLiteralType type,
224 bool* ok);
225
226
227 // Magical syntax support.
228 Expression* ParseV8Intrinsic(bool* ok);
229
INLINE(Token::Value peek ())230 INLINE(Token::Value peek()) { return scanner_.peek(); }
INLINE(Token::Value Next ())231 INLINE(Token::Value Next()) { return scanner_.Next(); }
232 INLINE(void Consume(Token::Value token));
233 void Expect(Token::Value token, bool* ok);
234 void ExpectSemicolon(bool* ok);
235
236 // Get odd-ball literals.
237 Literal* GetLiteralUndefined();
238 Literal* GetLiteralTheHole();
239 Literal* GetLiteralNumber(double value);
240
241 Handle<String> ParseIdentifier(bool* ok);
242 Handle<String> ParseIdentifierOrGetOrSet(bool* is_get,
243 bool* is_set,
244 bool* ok);
245
246 // Parser support
247 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode,
248 FunctionLiteral* fun,
249 bool resolve,
250 bool* ok) = 0;
251
252 bool TargetStackContainsLabel(Handle<String> label);
253 BreakableStatement* LookupBreakTarget(Handle<String> label, bool* ok);
254 IterationStatement* LookupContinueTarget(Handle<String> label, bool* ok);
255
256 void RegisterTargetUse(BreakTarget* target, Target* stop);
257
258 // Create a number literal.
259 Literal* NewNumberLiteral(double value);
260
261 // Generate AST node that throw a ReferenceError with the given type.
262 Expression* NewThrowReferenceError(Handle<String> type);
263
264 // Generate AST node that throw a SyntaxError with the given
265 // type. The first argument may be null (in the handle sense) in
266 // which case no arguments are passed to the constructor.
267 Expression* NewThrowSyntaxError(Handle<String> type, Handle<Object> first);
268
269 // Generate AST node that throw a TypeError with the given
270 // type. Both arguments must be non-null (in the handle sense).
271 Expression* NewThrowTypeError(Handle<String> type,
272 Handle<Object> first,
273 Handle<Object> second);
274
275 // Generic AST generator for throwing errors from compiled code.
276 Expression* NewThrowError(Handle<String> constructor,
277 Handle<String> type,
278 Vector< Handle<Object> > arguments);
279
280 friend class Target;
281 friend class TargetScope;
282 friend class LexicalScope;
283 friend class TemporaryScope;
284 };
285
286
287 template <typename T, int initial_size>
288 class BufferedZoneList {
289 public:
290
BufferedZoneList()291 BufferedZoneList() :
292 list_(NULL), last_(NULL) {}
293
294 // Adds element at end of list. This element is buffered and can
295 // be read using last() or removed using RemoveLast until a new Add or until
296 // RemoveLast or GetList has been called.
Add(T * value)297 void Add(T* value) {
298 if (last_ != NULL) {
299 if (list_ == NULL) {
300 list_ = new ZoneList<T*>(initial_size);
301 }
302 list_->Add(last_);
303 }
304 last_ = value;
305 }
306
last()307 T* last() {
308 ASSERT(last_ != NULL);
309 return last_;
310 }
311
RemoveLast()312 T* RemoveLast() {
313 ASSERT(last_ != NULL);
314 T* result = last_;
315 if (list_ != NULL && list_->length() > 0)
316 last_ = list_->RemoveLast();
317 else
318 last_ = NULL;
319 return result;
320 }
321
Get(int i)322 T* Get(int i) {
323 ASSERT(0 <= i && i < length());
324 if (list_ == NULL) {
325 ASSERT_EQ(0, i);
326 return last_;
327 } else {
328 if (i == list_->length()) {
329 ASSERT(last_ != NULL);
330 return last_;
331 } else {
332 return list_->at(i);
333 }
334 }
335 }
336
Clear()337 void Clear() {
338 list_ = NULL;
339 last_ = NULL;
340 }
341
length()342 int length() {
343 int length = (list_ == NULL) ? 0 : list_->length();
344 return length + ((last_ == NULL) ? 0 : 1);
345 }
346
GetList()347 ZoneList<T*>* GetList() {
348 if (list_ == NULL) {
349 list_ = new ZoneList<T*>(initial_size);
350 }
351 if (last_ != NULL) {
352 list_->Add(last_);
353 last_ = NULL;
354 }
355 return list_;
356 }
357
358 private:
359 ZoneList<T*>* list_;
360 T* last_;
361 };
362
363 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
364 class RegExpBuilder: public ZoneObject {
365 public:
366 RegExpBuilder();
367 void AddCharacter(uc16 character);
368 // "Adds" an empty expression. Does nothing except consume a
369 // following quantifier
370 void AddEmpty();
371 void AddAtom(RegExpTree* tree);
372 void AddAssertion(RegExpTree* tree);
373 void NewAlternative(); // '|'
374 void AddQuantifierToAtom(int min, int max, bool is_greedy);
375 RegExpTree* ToRegExp();
376 private:
377 void FlushCharacters();
378 void FlushText();
379 void FlushTerms();
380 bool pending_empty_;
381 ZoneList<uc16>* characters_;
382 BufferedZoneList<RegExpTree, 2> terms_;
383 BufferedZoneList<RegExpTree, 2> text_;
384 BufferedZoneList<RegExpTree, 2> alternatives_;
385 #ifdef DEBUG
386 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
387 #define LAST(x) last_added_ = x;
388 #else
389 #define LAST(x)
390 #endif
391 };
392
393
RegExpBuilder()394 RegExpBuilder::RegExpBuilder()
395 : pending_empty_(false),
396 characters_(NULL),
397 terms_(),
398 alternatives_()
399 #ifdef DEBUG
400 , last_added_(ADD_NONE)
401 #endif
402 {}
403
404
FlushCharacters()405 void RegExpBuilder::FlushCharacters() {
406 pending_empty_ = false;
407 if (characters_ != NULL) {
408 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());
409 characters_ = NULL;
410 text_.Add(atom);
411 LAST(ADD_ATOM);
412 }
413 }
414
415
FlushText()416 void RegExpBuilder::FlushText() {
417 FlushCharacters();
418 int num_text = text_.length();
419 if (num_text == 0) {
420 return;
421 } else if (num_text == 1) {
422 terms_.Add(text_.last());
423 } else {
424 RegExpText* text = new RegExpText();
425 for (int i = 0; i < num_text; i++)
426 text_.Get(i)->AppendToText(text);
427 terms_.Add(text);
428 }
429 text_.Clear();
430 }
431
432
AddCharacter(uc16 c)433 void RegExpBuilder::AddCharacter(uc16 c) {
434 pending_empty_ = false;
435 if (characters_ == NULL) {
436 characters_ = new ZoneList<uc16>(4);
437 }
438 characters_->Add(c);
439 LAST(ADD_CHAR);
440 }
441
442
AddEmpty()443 void RegExpBuilder::AddEmpty() {
444 pending_empty_ = true;
445 }
446
447
AddAtom(RegExpTree * term)448 void RegExpBuilder::AddAtom(RegExpTree* term) {
449 if (term->IsEmpty()) {
450 AddEmpty();
451 return;
452 }
453 if (term->IsTextElement()) {
454 FlushCharacters();
455 text_.Add(term);
456 } else {
457 FlushText();
458 terms_.Add(term);
459 }
460 LAST(ADD_ATOM);
461 }
462
463
AddAssertion(RegExpTree * assert)464 void RegExpBuilder::AddAssertion(RegExpTree* assert) {
465 FlushText();
466 terms_.Add(assert);
467 LAST(ADD_ASSERT);
468 }
469
470
NewAlternative()471 void RegExpBuilder::NewAlternative() {
472 FlushTerms();
473 }
474
475
FlushTerms()476 void RegExpBuilder::FlushTerms() {
477 FlushText();
478 int num_terms = terms_.length();
479 RegExpTree* alternative;
480 if (num_terms == 0) {
481 alternative = RegExpEmpty::GetInstance();
482 } else if (num_terms == 1) {
483 alternative = terms_.last();
484 } else {
485 alternative = new RegExpAlternative(terms_.GetList());
486 }
487 alternatives_.Add(alternative);
488 terms_.Clear();
489 LAST(ADD_NONE);
490 }
491
492
ToRegExp()493 RegExpTree* RegExpBuilder::ToRegExp() {
494 FlushTerms();
495 int num_alternatives = alternatives_.length();
496 if (num_alternatives == 0) {
497 return RegExpEmpty::GetInstance();
498 }
499 if (num_alternatives == 1) {
500 return alternatives_.last();
501 }
502 return new RegExpDisjunction(alternatives_.GetList());
503 }
504
505
AddQuantifierToAtom(int min,int max,bool is_greedy)506 void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) {
507 if (pending_empty_) {
508 pending_empty_ = false;
509 return;
510 }
511 RegExpTree* atom;
512 if (characters_ != NULL) {
513 ASSERT(last_added_ == ADD_CHAR);
514 // Last atom was character.
515 Vector<const uc16> char_vector = characters_->ToConstVector();
516 int num_chars = char_vector.length();
517 if (num_chars > 1) {
518 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
519 text_.Add(new RegExpAtom(prefix));
520 char_vector = char_vector.SubVector(num_chars - 1, num_chars);
521 }
522 characters_ = NULL;
523 atom = new RegExpAtom(char_vector);
524 FlushText();
525 } else if (text_.length() > 0) {
526 ASSERT(last_added_ == ADD_ATOM);
527 atom = text_.RemoveLast();
528 FlushText();
529 } else if (terms_.length() > 0) {
530 ASSERT(last_added_ == ADD_ATOM);
531 atom = terms_.RemoveLast();
532 if (atom->max_match() == 0) {
533 // Guaranteed to only match an empty string.
534 LAST(ADD_TERM);
535 if (min == 0) {
536 return;
537 }
538 terms_.Add(atom);
539 return;
540 }
541 } else {
542 // Only call immediately after adding an atom or character!
543 UNREACHABLE();
544 return;
545 }
546 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom));
547 LAST(ADD_TERM);
548 }
549
550
551 class RegExpParser {
552 public:
553 RegExpParser(FlatStringReader* in,
554 Handle<String>* error,
555 bool multiline_mode);
556 RegExpTree* ParsePattern();
557 RegExpTree* ParseDisjunction();
558 RegExpTree* ParseGroup();
559 RegExpTree* ParseCharacterClass();
560
561 // Parses a {...,...} quantifier and stores the range in the given
562 // out parameters.
563 bool ParseIntervalQuantifier(int* min_out, int* max_out);
564
565 // Parses and returns a single escaped character. The character
566 // must not be 'b' or 'B' since they are usually handle specially.
567 uc32 ParseClassCharacterEscape();
568
569 // Checks whether the following is a length-digit hexadecimal number,
570 // and sets the value if it is.
571 bool ParseHexEscape(int length, uc32* value);
572
573 uc32 ParseControlLetterEscape();
574 uc32 ParseOctalLiteral();
575
576 // Tries to parse the input as a back reference. If successful it
577 // stores the result in the output parameter and returns true. If
578 // it fails it will push back the characters read so the same characters
579 // can be reparsed.
580 bool ParseBackReferenceIndex(int* index_out);
581
582 CharacterRange ParseClassAtom(uc16* char_class);
583 RegExpTree* ReportError(Vector<const char> message);
584 void Advance();
585 void Advance(int dist);
586 void Reset(int pos);
587
588 // Reports whether the pattern might be used as a literal search string.
589 // Only use if the result of the parse is a single atom node.
590 bool simple();
contains_anchor()591 bool contains_anchor() { return contains_anchor_; }
set_contains_anchor()592 void set_contains_anchor() { contains_anchor_ = true; }
captures_started()593 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
position()594 int position() { return next_pos_ - 1; }
failed()595 bool failed() { return failed_; }
596
597 static const int kMaxCaptures = 1 << 16;
598 static const uc32 kEndMarker = (1 << 21);
599 private:
600 enum SubexpressionType {
601 INITIAL,
602 CAPTURE, // All positive values represent captures.
603 POSITIVE_LOOKAHEAD,
604 NEGATIVE_LOOKAHEAD,
605 GROUPING
606 };
607
608 class RegExpParserState : public ZoneObject {
609 public:
RegExpParserState(RegExpParserState * previous_state,SubexpressionType group_type,int disjunction_capture_index)610 RegExpParserState(RegExpParserState* previous_state,
611 SubexpressionType group_type,
612 int disjunction_capture_index)
613 : previous_state_(previous_state),
614 builder_(new RegExpBuilder()),
615 group_type_(group_type),
616 disjunction_capture_index_(disjunction_capture_index) {}
617 // Parser state of containing expression, if any.
previous_state()618 RegExpParserState* previous_state() { return previous_state_; }
IsSubexpression()619 bool IsSubexpression() { return previous_state_ != NULL; }
620 // RegExpBuilder building this regexp's AST.
builder()621 RegExpBuilder* builder() { return builder_; }
622 // Type of regexp being parsed (parenthesized group or entire regexp).
group_type()623 SubexpressionType group_type() { return group_type_; }
624 // Index in captures array of first capture in this sub-expression, if any.
625 // Also the capture index of this sub-expression itself, if group_type
626 // is CAPTURE.
capture_index()627 int capture_index() { return disjunction_capture_index_; }
628 private:
629 // Linked list implementation of stack of states.
630 RegExpParserState* previous_state_;
631 // Builder for the stored disjunction.
632 RegExpBuilder* builder_;
633 // Stored disjunction type (capture, look-ahead or grouping), if any.
634 SubexpressionType group_type_;
635 // Stored disjunction's capture index (if any).
636 int disjunction_capture_index_;
637 };
638
current()639 uc32 current() { return current_; }
has_more()640 bool has_more() { return has_more_; }
has_next()641 bool has_next() { return next_pos_ < in()->length(); }
642 uc32 Next();
in()643 FlatStringReader* in() { return in_; }
644 void ScanForCaptures();
645 uc32 current_;
646 bool has_more_;
647 bool multiline_;
648 int next_pos_;
649 FlatStringReader* in_;
650 Handle<String>* error_;
651 bool simple_;
652 bool contains_anchor_;
653 ZoneList<RegExpCapture*>* captures_;
654 bool is_scanned_for_captures_;
655 // The capture count is only valid after we have scanned for captures.
656 int capture_count_;
657 bool failed_;
658 };
659
660
661 // A temporary scope stores information during parsing, just like
662 // a plain scope. However, temporary scopes are not kept around
663 // after parsing or referenced by syntax trees so they can be stack-
664 // allocated and hence used by the pre-parser.
665 class TemporaryScope BASE_EMBEDDED {
666 public:
667 explicit TemporaryScope(Parser* parser);
668 ~TemporaryScope();
669
NextMaterializedLiteralIndex()670 int NextMaterializedLiteralIndex() {
671 int next_index =
672 materialized_literal_count_ + JSFunction::kLiteralsPrefixSize;
673 materialized_literal_count_++;
674 return next_index;
675 }
materialized_literal_count()676 int materialized_literal_count() { return materialized_literal_count_; }
677
set_contains_array_literal()678 void set_contains_array_literal() { contains_array_literal_ = true; }
contains_array_literal()679 bool contains_array_literal() { return contains_array_literal_; }
680
SetThisPropertyAssignmentInfo(bool only_this_property_assignments,bool only_simple_this_property_assignments,Handle<FixedArray> this_property_assignments)681 void SetThisPropertyAssignmentInfo(
682 bool only_this_property_assignments,
683 bool only_simple_this_property_assignments,
684 Handle<FixedArray> this_property_assignments) {
685 only_this_property_assignments_ = only_this_property_assignments;
686 only_simple_this_property_assignments_ =
687 only_simple_this_property_assignments;
688 this_property_assignments_ = this_property_assignments;
689 }
only_this_property_assignments()690 bool only_this_property_assignments() {
691 return only_this_property_assignments_;
692 }
only_simple_this_property_assignments()693 bool only_simple_this_property_assignments() {
694 return only_simple_this_property_assignments_;
695 }
this_property_assignments()696 Handle<FixedArray> this_property_assignments() {
697 return this_property_assignments_;
698 }
699
AddProperty()700 void AddProperty() { expected_property_count_++; }
expected_property_count()701 int expected_property_count() { return expected_property_count_; }
702 private:
703 // Captures the number of nodes that need materialization in the
704 // function. regexp literals, and boilerplate for object literals.
705 int materialized_literal_count_;
706
707 // Captures whether or not the function contains array literals. If
708 // the function contains array literals, we have to allocate space
709 // for the array constructor in the literals array of the function.
710 // This array constructor is used when creating the actual array
711 // literals.
712 bool contains_array_literal_;
713
714 // Properties count estimation.
715 int expected_property_count_;
716
717 bool only_this_property_assignments_;
718 bool only_simple_this_property_assignments_;
719 Handle<FixedArray> this_property_assignments_;
720
721 // Bookkeeping
722 Parser* parser_;
723 TemporaryScope* parent_;
724
725 friend class Parser;
726 };
727
728
TemporaryScope(Parser * parser)729 TemporaryScope::TemporaryScope(Parser* parser)
730 : materialized_literal_count_(0),
731 contains_array_literal_(false),
732 expected_property_count_(0),
733 only_this_property_assignments_(false),
734 only_simple_this_property_assignments_(false),
735 this_property_assignments_(Factory::empty_fixed_array()),
736 parser_(parser),
737 parent_(parser->temp_scope_) {
738 parser->temp_scope_ = this;
739 }
740
741
~TemporaryScope()742 TemporaryScope::~TemporaryScope() {
743 parser_->temp_scope_ = parent_;
744 }
745
746
747 // A zone list wrapper lets code either access a access a zone list
748 // or appear to do so while actually ignoring all operations.
749 template <typename T>
750 class ZoneListWrapper {
751 public:
ZoneListWrapper()752 ZoneListWrapper() : list_(NULL) { }
ZoneListWrapper(int size)753 explicit ZoneListWrapper(int size) : list_(new ZoneList<T*>(size)) { }
Add(T * that)754 void Add(T* that) { if (list_) list_->Add(that); }
length()755 int length() { return list_->length(); }
elements()756 ZoneList<T*>* elements() { return list_; }
at(int index)757 T* at(int index) { return list_->at(index); }
758 private:
759 ZoneList<T*>* list_;
760 };
761
762
763 // Allocation macro that should be used to allocate objects that must
764 // only be allocated in real parsing mode. Note that in preparse mode
765 // not only is the syntax tree not created but the constructor
766 // arguments are not evaluated.
767 #define NEW(expr) (is_pre_parsing_ ? NULL : new expr)
768
769
770 class ParserFactory BASE_EMBEDDED {
771 public:
ParserFactory(bool is_pre_parsing)772 explicit ParserFactory(bool is_pre_parsing) :
773 is_pre_parsing_(is_pre_parsing) { }
774
~ParserFactory()775 virtual ~ParserFactory() { }
776
777 virtual Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with);
778
LookupSymbol(const char * string,int length)779 virtual Handle<String> LookupSymbol(const char* string, int length) {
780 return Handle<String>();
781 }
782
EmptySymbol()783 virtual Handle<String> EmptySymbol() {
784 return Handle<String>();
785 }
786
NewProperty(Expression * obj,Expression * key,int pos)787 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) {
788 if (obj == VariableProxySentinel::this_proxy()) {
789 return Property::this_property();
790 } else {
791 return ValidLeftHandSideSentinel::instance();
792 }
793 }
794
NewCall(Expression * expression,ZoneList<Expression * > * arguments,int pos)795 virtual Expression* NewCall(Expression* expression,
796 ZoneList<Expression*>* arguments,
797 int pos) {
798 return Call::sentinel();
799 }
800
NewCallEval(Expression * expression,ZoneList<Expression * > * arguments,int pos)801 virtual Expression* NewCallEval(Expression* expression,
802 ZoneList<Expression*>* arguments,
803 int pos) {
804 return CallEval::sentinel();
805 }
806
EmptyStatement()807 virtual Statement* EmptyStatement() {
808 return NULL;
809 }
810
NewList(int size)811 template <typename T> ZoneListWrapper<T> NewList(int size) {
812 return is_pre_parsing_ ? ZoneListWrapper<T>() : ZoneListWrapper<T>(size);
813 }
814
815 private:
816 bool is_pre_parsing_;
817 };
818
819
820 class ParserLog BASE_EMBEDDED {
821 public:
~ParserLog()822 virtual ~ParserLog() { }
823
824 // Records the occurrence of a function. The returned object is
825 // only guaranteed to be valid until the next function has been
826 // logged.
LogFunction(int start)827 virtual FunctionEntry LogFunction(int start) { return FunctionEntry(); }
828
LogError()829 virtual void LogError() { }
830 };
831
832
833 class AstBuildingParserFactory : public ParserFactory {
834 public:
AstBuildingParserFactory()835 AstBuildingParserFactory() : ParserFactory(false) { }
836
837 virtual Scope* NewScope(Scope* parent, Scope::Type type, bool inside_with);
838
LookupSymbol(const char * string,int length)839 virtual Handle<String> LookupSymbol(const char* string, int length) {
840 return Factory::LookupSymbol(Vector<const char>(string, length));
841 }
842
EmptySymbol()843 virtual Handle<String> EmptySymbol() {
844 return Factory::empty_symbol();
845 }
846
NewProperty(Expression * obj,Expression * key,int pos)847 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) {
848 return new Property(obj, key, pos);
849 }
850
NewCall(Expression * expression,ZoneList<Expression * > * arguments,int pos)851 virtual Expression* NewCall(Expression* expression,
852 ZoneList<Expression*>* arguments,
853 int pos) {
854 return new Call(expression, arguments, pos);
855 }
856
NewCallEval(Expression * expression,ZoneList<Expression * > * arguments,int pos)857 virtual Expression* NewCallEval(Expression* expression,
858 ZoneList<Expression*>* arguments,
859 int pos) {
860 return new CallEval(expression, arguments, pos);
861 }
862
863 virtual Statement* EmptyStatement();
864 };
865
866
867 class ParserRecorder: public ParserLog {
868 public:
869 ParserRecorder();
870 virtual FunctionEntry LogFunction(int start);
LogError()871 virtual void LogError() { }
872 virtual void LogMessage(Scanner::Location loc,
873 const char* message,
874 Vector<const char*> args);
875 void WriteString(Vector<const char> str);
876 static const char* ReadString(unsigned* start, int* chars);
store()877 List<unsigned>* store() { return &store_; }
878 private:
879 bool has_error_;
880 List<unsigned> store_;
881 };
882
883
GetFunctionEnd(int start)884 FunctionEntry ScriptDataImpl::GetFunctionEnd(int start) {
885 if (nth(last_entry_).start_pos() > start) {
886 // If the last entry we looked up is higher than what we're
887 // looking for then it's useless and we reset it.
888 last_entry_ = 0;
889 }
890 for (int i = last_entry_; i < EntryCount(); i++) {
891 FunctionEntry entry = nth(i);
892 if (entry.start_pos() == start) {
893 last_entry_ = i;
894 return entry;
895 }
896 }
897 return FunctionEntry();
898 }
899
900
SanityCheck()901 bool ScriptDataImpl::SanityCheck() {
902 if (store_.length() < static_cast<int>(ScriptDataImpl::kHeaderSize))
903 return false;
904 if (magic() != ScriptDataImpl::kMagicNumber)
905 return false;
906 if (version() != ScriptDataImpl::kCurrentVersion)
907 return false;
908 return true;
909 }
910
911
EntryCount()912 int ScriptDataImpl::EntryCount() {
913 return (store_.length() - kHeaderSize) / FunctionEntry::kSize;
914 }
915
916
nth(int n)917 FunctionEntry ScriptDataImpl::nth(int n) {
918 int offset = kHeaderSize + n * FunctionEntry::kSize;
919 return FunctionEntry(Vector<unsigned>(store_.start() + offset,
920 FunctionEntry::kSize));
921 }
922
923
ParserRecorder()924 ParserRecorder::ParserRecorder()
925 : has_error_(false), store_(4) {
926 Vector<unsigned> preamble = store()->AddBlock(0, ScriptDataImpl::kHeaderSize);
927 preamble[ScriptDataImpl::kMagicOffset] = ScriptDataImpl::kMagicNumber;
928 preamble[ScriptDataImpl::kVersionOffset] = ScriptDataImpl::kCurrentVersion;
929 preamble[ScriptDataImpl::kHasErrorOffset] = false;
930 }
931
932
WriteString(Vector<const char> str)933 void ParserRecorder::WriteString(Vector<const char> str) {
934 store()->Add(str.length());
935 for (int i = 0; i < str.length(); i++)
936 store()->Add(str[i]);
937 }
938
939
ReadString(unsigned * start,int * chars)940 const char* ParserRecorder::ReadString(unsigned* start, int* chars) {
941 int length = start[0];
942 char* result = NewArray<char>(length + 1);
943 for (int i = 0; i < length; i++)
944 result[i] = start[i + 1];
945 result[length] = '\0';
946 if (chars != NULL) *chars = length;
947 return result;
948 }
949
950
LogMessage(Scanner::Location loc,const char * message,Vector<const char * > args)951 void ParserRecorder::LogMessage(Scanner::Location loc, const char* message,
952 Vector<const char*> args) {
953 if (has_error_) return;
954 store()->Rewind(ScriptDataImpl::kHeaderSize);
955 store()->at(ScriptDataImpl::kHasErrorOffset) = true;
956 store()->Add(loc.beg_pos);
957 store()->Add(loc.end_pos);
958 store()->Add(args.length());
959 WriteString(CStrVector(message));
960 for (int i = 0; i < args.length(); i++)
961 WriteString(CStrVector(args[i]));
962 }
963
964
MessageLocation()965 Scanner::Location ScriptDataImpl::MessageLocation() {
966 int beg_pos = Read(0);
967 int end_pos = Read(1);
968 return Scanner::Location(beg_pos, end_pos);
969 }
970
971
BuildMessage()972 const char* ScriptDataImpl::BuildMessage() {
973 unsigned* start = ReadAddress(3);
974 return ParserRecorder::ReadString(start, NULL);
975 }
976
977
BuildArgs()978 Vector<const char*> ScriptDataImpl::BuildArgs() {
979 int arg_count = Read(2);
980 const char** array = NewArray<const char*>(arg_count);
981 int pos = ScriptDataImpl::kHeaderSize + Read(3);
982 for (int i = 0; i < arg_count; i++) {
983 int count = 0;
984 array[i] = ParserRecorder::ReadString(ReadAddress(pos), &count);
985 pos += count + 1;
986 }
987 return Vector<const char*>(array, arg_count);
988 }
989
990
Read(int position)991 unsigned ScriptDataImpl::Read(int position) {
992 return store_[ScriptDataImpl::kHeaderSize + position];
993 }
994
995
ReadAddress(int position)996 unsigned* ScriptDataImpl::ReadAddress(int position) {
997 return &store_[ScriptDataImpl::kHeaderSize + position];
998 }
999
1000
LogFunction(int start)1001 FunctionEntry ParserRecorder::LogFunction(int start) {
1002 if (has_error_) return FunctionEntry();
1003 FunctionEntry result(store()->AddBlock(0, FunctionEntry::kSize));
1004 result.set_start_pos(start);
1005 return result;
1006 }
1007
1008
1009 class AstBuildingParser : public Parser {
1010 public:
AstBuildingParser(Handle<Script> script,bool allow_natives_syntax,v8::Extension * extension,ScriptDataImpl * pre_data)1011 AstBuildingParser(Handle<Script> script, bool allow_natives_syntax,
1012 v8::Extension* extension, ScriptDataImpl* pre_data)
1013 : Parser(script, allow_natives_syntax, extension, false,
1014 factory(), log(), pre_data) { }
1015 virtual void ReportMessageAt(Scanner::Location loc, const char* message,
1016 Vector<const char*> args);
1017 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode,
1018 FunctionLiteral* fun, bool resolve, bool* ok);
factory()1019 AstBuildingParserFactory* factory() { return &factory_; }
log()1020 ParserLog* log() { return &log_; }
1021
1022 private:
1023 ParserLog log_;
1024 AstBuildingParserFactory factory_;
1025 };
1026
1027
1028 class PreParser : public Parser {
1029 public:
PreParser(Handle<Script> script,bool allow_natives_syntax,v8::Extension * extension)1030 PreParser(Handle<Script> script, bool allow_natives_syntax,
1031 v8::Extension* extension)
1032 : Parser(script, allow_natives_syntax, extension, true,
1033 factory(), recorder(), NULL)
1034 , factory_(true) { }
1035 virtual void ReportMessageAt(Scanner::Location loc, const char* message,
1036 Vector<const char*> args);
1037 virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode,
1038 FunctionLiteral* fun, bool resolve, bool* ok);
factory()1039 ParserFactory* factory() { return &factory_; }
recorder()1040 ParserRecorder* recorder() { return &recorder_; }
1041
1042 private:
1043 ParserRecorder recorder_;
1044 ParserFactory factory_;
1045 };
1046
1047
NewScope(Scope * parent,Scope::Type type,bool inside_with)1048 Scope* AstBuildingParserFactory::NewScope(Scope* parent, Scope::Type type,
1049 bool inside_with) {
1050 Scope* result = new Scope(parent, type);
1051 result->Initialize(inside_with);
1052 return result;
1053 }
1054
1055
EmptyStatement()1056 Statement* AstBuildingParserFactory::EmptyStatement() {
1057 // Use a statically allocated empty statement singleton to avoid
1058 // allocating lots and lots of empty statements.
1059 static v8::internal::EmptyStatement empty;
1060 return ∅
1061 }
1062
1063
NewScope(Scope * parent,Scope::Type type,bool inside_with)1064 Scope* ParserFactory::NewScope(Scope* parent, Scope::Type type,
1065 bool inside_with) {
1066 ASSERT(parent != NULL);
1067 parent->type_ = type;
1068 return parent;
1069 }
1070
1071
Declare(Handle<String> name,Variable::Mode mode,FunctionLiteral * fun,bool resolve,bool * ok)1072 VariableProxy* PreParser::Declare(Handle<String> name, Variable::Mode mode,
1073 FunctionLiteral* fun, bool resolve,
1074 bool* ok) {
1075 return NULL;
1076 }
1077
1078
1079
1080 // ----------------------------------------------------------------------------
1081 // Target is a support class to facilitate manipulation of the
1082 // Parser's target_stack_ (the stack of potential 'break' and
1083 // 'continue' statement targets). Upon construction, a new target is
1084 // added; it is removed upon destruction.
1085
1086 class Target BASE_EMBEDDED {
1087 public:
Target(Parser * parser,AstNode * node)1088 Target(Parser* parser, AstNode* node)
1089 : parser_(parser), node_(node), previous_(parser_->target_stack_) {
1090 parser_->target_stack_ = this;
1091 }
1092
~Target()1093 ~Target() {
1094 parser_->target_stack_ = previous_;
1095 }
1096
previous()1097 Target* previous() { return previous_; }
node()1098 AstNode* node() { return node_; }
1099
1100 private:
1101 Parser* parser_;
1102 AstNode* node_;
1103 Target* previous_;
1104 };
1105
1106
1107 class TargetScope BASE_EMBEDDED {
1108 public:
TargetScope(Parser * parser)1109 explicit TargetScope(Parser* parser)
1110 : parser_(parser), previous_(parser->target_stack_) {
1111 parser->target_stack_ = NULL;
1112 }
1113
~TargetScope()1114 ~TargetScope() {
1115 parser_->target_stack_ = previous_;
1116 }
1117
1118 private:
1119 Parser* parser_;
1120 Target* previous_;
1121 };
1122
1123
1124 // ----------------------------------------------------------------------------
1125 // LexicalScope is a support class to facilitate manipulation of the
1126 // Parser's scope stack. The constructor sets the parser's top scope
1127 // to the incoming scope, and the destructor resets it.
1128
1129 class LexicalScope BASE_EMBEDDED {
1130 public:
LexicalScope(Parser * parser,Scope * scope)1131 LexicalScope(Parser* parser, Scope* scope)
1132 : parser_(parser),
1133 prev_scope_(parser->top_scope_),
1134 prev_level_(parser->with_nesting_level_) {
1135 parser_->top_scope_ = scope;
1136 parser_->with_nesting_level_ = 0;
1137 }
1138
~LexicalScope()1139 ~LexicalScope() {
1140 parser_->top_scope_ = prev_scope_;
1141 parser_->with_nesting_level_ = prev_level_;
1142 }
1143
1144 private:
1145 Parser* parser_;
1146 Scope* prev_scope_;
1147 int prev_level_;
1148 };
1149
1150
1151 // ----------------------------------------------------------------------------
1152 // The CHECK_OK macro is a convenient macro to enforce error
1153 // handling for functions that may fail (by returning !*ok).
1154 //
1155 // CAUTION: This macro appends extra statements after a call,
1156 // thus it must never be used where only a single statement
1157 // is correct (e.g. an if statement branch w/o braces)!
1158
1159 #define CHECK_OK ok); \
1160 if (!*ok) return NULL; \
1161 ((void)0
1162 #define DUMMY ) // to make indentation work
1163 #undef DUMMY
1164
1165 #define CHECK_FAILED /**/); \
1166 if (failed_) return NULL; \
1167 ((void)0
1168 #define DUMMY ) // to make indentation work
1169 #undef DUMMY
1170
1171 // ----------------------------------------------------------------------------
1172 // Implementation of Parser
1173
Parser(Handle<Script> script,bool allow_natives_syntax,v8::Extension * extension,bool is_pre_parsing,ParserFactory * factory,ParserLog * log,ScriptDataImpl * pre_data)1174 Parser::Parser(Handle<Script> script,
1175 bool allow_natives_syntax,
1176 v8::Extension* extension,
1177 bool is_pre_parsing,
1178 ParserFactory* factory,
1179 ParserLog* log,
1180 ScriptDataImpl* pre_data)
1181 : script_(script),
1182 scanner_(is_pre_parsing),
1183 top_scope_(NULL),
1184 with_nesting_level_(0),
1185 temp_scope_(NULL),
1186 target_stack_(NULL),
1187 allow_natives_syntax_(allow_natives_syntax),
1188 extension_(extension),
1189 factory_(factory),
1190 log_(log),
1191 is_pre_parsing_(is_pre_parsing),
1192 pre_data_(pre_data) {
1193 }
1194
1195
PreParseProgram(Handle<String> source,unibrow::CharacterStream * stream)1196 bool Parser::PreParseProgram(Handle<String> source,
1197 unibrow::CharacterStream* stream) {
1198 HistogramTimerScope timer(&Counters::pre_parse);
1199 StackGuard guard;
1200 AssertNoZoneAllocation assert_no_zone_allocation;
1201 AssertNoAllocation assert_no_allocation;
1202 NoHandleAllocation no_handle_allocation;
1203 scanner_.Init(source, stream, 0);
1204 ASSERT(target_stack_ == NULL);
1205 mode_ = PARSE_EAGERLY;
1206 DummyScope top_scope;
1207 LexicalScope scope(this, &top_scope);
1208 TemporaryScope temp_scope(this);
1209 ZoneListWrapper<Statement> processor;
1210 bool ok = true;
1211 ParseSourceElements(&processor, Token::EOS, &ok);
1212 return !scanner().stack_overflow();
1213 }
1214
1215
ParseProgram(Handle<String> source,unibrow::CharacterStream * stream,bool in_global_context)1216 FunctionLiteral* Parser::ParseProgram(Handle<String> source,
1217 unibrow::CharacterStream* stream,
1218 bool in_global_context) {
1219 CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
1220
1221 HistogramTimerScope timer(&Counters::parse);
1222 Counters::total_parse_size.Increment(source->length());
1223
1224 // Initialize parser state.
1225 source->TryFlattenIfNotFlat();
1226 scanner_.Init(source, stream, 0);
1227 ASSERT(target_stack_ == NULL);
1228
1229 // Compute the parsing mode.
1230 mode_ = FLAG_lazy ? PARSE_LAZILY : PARSE_EAGERLY;
1231 if (allow_natives_syntax_ || extension_ != NULL) mode_ = PARSE_EAGERLY;
1232
1233 Scope::Type type =
1234 in_global_context
1235 ? Scope::GLOBAL_SCOPE
1236 : Scope::EVAL_SCOPE;
1237 Handle<String> no_name = factory()->EmptySymbol();
1238
1239 FunctionLiteral* result = NULL;
1240 { Scope* scope = factory()->NewScope(top_scope_, type, inside_with());
1241 LexicalScope lexical_scope(this, scope);
1242 TemporaryScope temp_scope(this);
1243 ZoneListWrapper<Statement> body(16);
1244 bool ok = true;
1245 ParseSourceElements(&body, Token::EOS, &ok);
1246 if (ok) {
1247 result = NEW(FunctionLiteral(
1248 no_name,
1249 top_scope_,
1250 body.elements(),
1251 temp_scope.materialized_literal_count(),
1252 temp_scope.contains_array_literal(),
1253 temp_scope.expected_property_count(),
1254 temp_scope.only_this_property_assignments(),
1255 temp_scope.only_simple_this_property_assignments(),
1256 temp_scope.this_property_assignments(),
1257 0,
1258 0,
1259 source->length(),
1260 false));
1261 } else if (scanner().stack_overflow()) {
1262 Top::StackOverflow();
1263 }
1264 }
1265
1266 // Make sure the target stack is empty.
1267 ASSERT(target_stack_ == NULL);
1268
1269 // If there was a syntax error we have to get rid of the AST
1270 // and it is not safe to do so before the scope has been deleted.
1271 if (result == NULL) zone_scope.DeleteOnExit();
1272 return result;
1273 }
1274
1275
ParseLazy(Handle<String> source,Handle<String> name,int start_position,bool is_expression)1276 FunctionLiteral* Parser::ParseLazy(Handle<String> source,
1277 Handle<String> name,
1278 int start_position,
1279 bool is_expression) {
1280 CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
1281 HistogramTimerScope timer(&Counters::parse_lazy);
1282 source->TryFlattenIfNotFlat();
1283 Counters::total_parse_size.Increment(source->length());
1284 SafeStringInputBuffer buffer(source.location());
1285
1286 // Initialize parser state.
1287 scanner_.Init(source, &buffer, start_position);
1288 ASSERT(target_stack_ == NULL);
1289 mode_ = PARSE_EAGERLY;
1290
1291 // Place holder for the result.
1292 FunctionLiteral* result = NULL;
1293
1294 {
1295 // Parse the function literal.
1296 Handle<String> no_name = factory()->EmptySymbol();
1297 Scope* scope =
1298 factory()->NewScope(top_scope_, Scope::GLOBAL_SCOPE, inside_with());
1299 LexicalScope lexical_scope(this, scope);
1300 TemporaryScope temp_scope(this);
1301
1302 FunctionLiteralType type = is_expression ? EXPRESSION : DECLARATION;
1303 bool ok = true;
1304 result = ParseFunctionLiteral(name, RelocInfo::kNoPosition, type, &ok);
1305 // Make sure the results agree.
1306 ASSERT(ok == (result != NULL));
1307 // The only errors should be stack overflows.
1308 ASSERT(ok || scanner_.stack_overflow());
1309 }
1310
1311 // Make sure the target stack is empty.
1312 ASSERT(target_stack_ == NULL);
1313
1314 // If there was a stack overflow we have to get rid of AST and it is
1315 // not safe to do before scope has been deleted.
1316 if (result == NULL) {
1317 Top::StackOverflow();
1318 zone_scope.DeleteOnExit();
1319 }
1320 return result;
1321 }
1322
1323
ReportMessage(const char * type,Vector<const char * > args)1324 void Parser::ReportMessage(const char* type, Vector<const char*> args) {
1325 Scanner::Location source_location = scanner_.location();
1326 ReportMessageAt(source_location, type, args);
1327 }
1328
1329
ReportMessageAt(Scanner::Location source_location,const char * type,Vector<const char * > args)1330 void AstBuildingParser::ReportMessageAt(Scanner::Location source_location,
1331 const char* type,
1332 Vector<const char*> args) {
1333 MessageLocation location(script_,
1334 source_location.beg_pos, source_location.end_pos);
1335 Handle<JSArray> array = Factory::NewJSArray(args.length());
1336 for (int i = 0; i < args.length(); i++) {
1337 SetElement(array, i, Factory::NewStringFromUtf8(CStrVector(args[i])));
1338 }
1339 Handle<Object> result = Factory::NewSyntaxError(type, array);
1340 Top::Throw(*result, &location);
1341 }
1342
1343
ReportMessageAt(Scanner::Location source_location,const char * type,Vector<const char * > args)1344 void PreParser::ReportMessageAt(Scanner::Location source_location,
1345 const char* type,
1346 Vector<const char*> args) {
1347 recorder()->LogMessage(source_location, type, args);
1348 }
1349
1350
1351 // Base class containing common code for the different finder classes used by
1352 // the parser.
1353 class ParserFinder {
1354 protected:
ParserFinder()1355 ParserFinder() {}
AsAssignment(Statement * stat)1356 static Assignment* AsAssignment(Statement* stat) {
1357 if (stat == NULL) return NULL;
1358 ExpressionStatement* exp_stat = stat->AsExpressionStatement();
1359 if (exp_stat == NULL) return NULL;
1360 return exp_stat->expression()->AsAssignment();
1361 }
1362 };
1363
1364
1365 // An InitializationBlockFinder finds and marks sequences of statements of the
1366 // form x.y.z.a = ...; x.y.z.b = ...; etc.
1367 class InitializationBlockFinder : public ParserFinder {
1368 public:
InitializationBlockFinder()1369 InitializationBlockFinder()
1370 : first_in_block_(NULL), last_in_block_(NULL), block_size_(0) {}
1371
~InitializationBlockFinder()1372 ~InitializationBlockFinder() {
1373 if (InBlock()) EndBlock();
1374 }
1375
Update(Statement * stat)1376 void Update(Statement* stat) {
1377 Assignment* assignment = AsAssignment(stat);
1378 if (InBlock()) {
1379 if (BlockContinues(assignment)) {
1380 UpdateBlock(assignment);
1381 } else {
1382 EndBlock();
1383 }
1384 }
1385 if (!InBlock() && (assignment != NULL) &&
1386 (assignment->op() == Token::ASSIGN)) {
1387 StartBlock(assignment);
1388 }
1389 }
1390
1391 private:
1392 // Returns true if the expressions appear to denote the same object.
1393 // In the context of initialization blocks, we only consider expressions
1394 // of the form 'x.y.z'.
SameObject(Expression * e1,Expression * e2)1395 static bool SameObject(Expression* e1, Expression* e2) {
1396 VariableProxy* v1 = e1->AsVariableProxy();
1397 VariableProxy* v2 = e2->AsVariableProxy();
1398 if (v1 != NULL && v2 != NULL) {
1399 return v1->name()->Equals(*v2->name());
1400 }
1401 Property* p1 = e1->AsProperty();
1402 Property* p2 = e2->AsProperty();
1403 if ((p1 == NULL) || (p2 == NULL)) return false;
1404 Literal* key1 = p1->key()->AsLiteral();
1405 Literal* key2 = p2->key()->AsLiteral();
1406 if ((key1 == NULL) || (key2 == NULL)) return false;
1407 if (!key1->handle()->IsString() || !key2->handle()->IsString()) {
1408 return false;
1409 }
1410 String* name1 = String::cast(*key1->handle());
1411 String* name2 = String::cast(*key2->handle());
1412 if (!name1->Equals(name2)) return false;
1413 return SameObject(p1->obj(), p2->obj());
1414 }
1415
1416 // Returns true if the expressions appear to denote different properties
1417 // of the same object.
PropertyOfSameObject(Expression * e1,Expression * e2)1418 static bool PropertyOfSameObject(Expression* e1, Expression* e2) {
1419 Property* p1 = e1->AsProperty();
1420 Property* p2 = e2->AsProperty();
1421 if ((p1 == NULL) || (p2 == NULL)) return false;
1422 return SameObject(p1->obj(), p2->obj());
1423 }
1424
BlockContinues(Assignment * assignment)1425 bool BlockContinues(Assignment* assignment) {
1426 if ((assignment == NULL) || (first_in_block_ == NULL)) return false;
1427 if (assignment->op() != Token::ASSIGN) return false;
1428 return PropertyOfSameObject(first_in_block_->target(),
1429 assignment->target());
1430 }
1431
StartBlock(Assignment * assignment)1432 void StartBlock(Assignment* assignment) {
1433 first_in_block_ = assignment;
1434 last_in_block_ = assignment;
1435 block_size_ = 1;
1436 }
1437
UpdateBlock(Assignment * assignment)1438 void UpdateBlock(Assignment* assignment) {
1439 last_in_block_ = assignment;
1440 ++block_size_;
1441 }
1442
EndBlock()1443 void EndBlock() {
1444 if (block_size_ >= Parser::kMinInitializationBlock) {
1445 first_in_block_->mark_block_start();
1446 last_in_block_->mark_block_end();
1447 }
1448 last_in_block_ = first_in_block_ = NULL;
1449 block_size_ = 0;
1450 }
1451
InBlock()1452 bool InBlock() { return first_in_block_ != NULL; }
1453
1454 Assignment* first_in_block_;
1455 Assignment* last_in_block_;
1456 int block_size_;
1457
1458 DISALLOW_COPY_AND_ASSIGN(InitializationBlockFinder);
1459 };
1460
1461
1462 // A ThisNamedPropertyAssigmentFinder finds and marks statements of the form
1463 // this.x = ...;, where x is a named property. It also determines whether a
1464 // function contains only assignments of this type.
1465 class ThisNamedPropertyAssigmentFinder : public ParserFinder {
1466 public:
ThisNamedPropertyAssigmentFinder()1467 ThisNamedPropertyAssigmentFinder()
1468 : only_this_property_assignments_(true),
1469 only_simple_this_property_assignments_(true),
1470 names_(NULL),
1471 assigned_arguments_(NULL),
1472 assigned_constants_(NULL) {}
1473
Update(Scope * scope,Statement * stat)1474 void Update(Scope* scope, Statement* stat) {
1475 // Bail out if function already has non this property assignment
1476 // statements.
1477 if (!only_this_property_assignments_) {
1478 return;
1479 }
1480
1481 // Check whether this statement is of the form this.x = ...;
1482 Assignment* assignment = AsAssignment(stat);
1483 if (IsThisPropertyAssignment(assignment)) {
1484 HandleThisPropertyAssignment(scope, assignment);
1485 } else {
1486 only_this_property_assignments_ = false;
1487 only_simple_this_property_assignments_ = false;
1488 }
1489 }
1490
1491 // Returns whether only statements of the form this.x = ...; was encountered.
only_this_property_assignments()1492 bool only_this_property_assignments() {
1493 return only_this_property_assignments_;
1494 }
1495
1496 // Returns whether only statements of the form this.x = y; where y is either a
1497 // constant or a function argument was encountered.
only_simple_this_property_assignments()1498 bool only_simple_this_property_assignments() {
1499 return only_simple_this_property_assignments_;
1500 }
1501
1502 // Returns a fixed array containing three elements for each assignment of the
1503 // form this.x = y;
GetThisPropertyAssignments()1504 Handle<FixedArray> GetThisPropertyAssignments() {
1505 if (names_ == NULL) {
1506 return Factory::empty_fixed_array();
1507 }
1508 ASSERT(names_ != NULL);
1509 ASSERT(assigned_arguments_ != NULL);
1510 ASSERT_EQ(names_->length(), assigned_arguments_->length());
1511 ASSERT_EQ(names_->length(), assigned_constants_->length());
1512 Handle<FixedArray> assignments =
1513 Factory::NewFixedArray(names_->length() * 3);
1514 for (int i = 0; i < names_->length(); i++) {
1515 assignments->set(i * 3, *names_->at(i));
1516 assignments->set(i * 3 + 1, Smi::FromInt(assigned_arguments_->at(i)));
1517 assignments->set(i * 3 + 2, *assigned_constants_->at(i));
1518 }
1519 return assignments;
1520 }
1521
1522 private:
IsThisPropertyAssignment(Assignment * assignment)1523 bool IsThisPropertyAssignment(Assignment* assignment) {
1524 if (assignment != NULL) {
1525 Property* property = assignment->target()->AsProperty();
1526 return assignment->op() == Token::ASSIGN
1527 && property != NULL
1528 && property->obj()->AsVariableProxy() != NULL
1529 && property->obj()->AsVariableProxy()->is_this();
1530 }
1531 return false;
1532 }
1533
HandleThisPropertyAssignment(Scope * scope,Assignment * assignment)1534 void HandleThisPropertyAssignment(Scope* scope, Assignment* assignment) {
1535 // Check that the property assigned to is a named property.
1536 Property* property = assignment->target()->AsProperty();
1537 ASSERT(property != NULL);
1538 Literal* literal = property->key()->AsLiteral();
1539 uint32_t dummy;
1540 if (literal != NULL &&
1541 literal->handle()->IsString() &&
1542 !String::cast(*(literal->handle()))->AsArrayIndex(&dummy)) {
1543 Handle<String> key = Handle<String>::cast(literal->handle());
1544
1545 // Check whether the value assigned is either a constant or matches the
1546 // name of one of the arguments to the function.
1547 if (assignment->value()->AsLiteral() != NULL) {
1548 // Constant assigned.
1549 Literal* literal = assignment->value()->AsLiteral();
1550 AssignmentFromConstant(key, literal->handle());
1551 } else if (assignment->value()->AsVariableProxy() != NULL) {
1552 // Variable assigned.
1553 Handle<String> name =
1554 assignment->value()->AsVariableProxy()->name();
1555 // Check whether the variable assigned matches an argument name.
1556 int index = -1;
1557 for (int i = 0; i < scope->num_parameters(); i++) {
1558 if (*scope->parameter(i)->name() == *name) {
1559 // Assigned from function argument.
1560 index = i;
1561 break;
1562 }
1563 }
1564 if (index != -1) {
1565 AssignmentFromParameter(key, index);
1566 } else {
1567 AssignmentFromSomethingElse(key);
1568 }
1569 } else {
1570 AssignmentFromSomethingElse(key);
1571 }
1572 }
1573 }
1574
AssignmentFromParameter(Handle<String> name,int index)1575 void AssignmentFromParameter(Handle<String> name, int index) {
1576 EnsureAllocation();
1577 names_->Add(name);
1578 assigned_arguments_->Add(index);
1579 assigned_constants_->Add(Factory::undefined_value());
1580 }
1581
AssignmentFromConstant(Handle<String> name,Handle<Object> value)1582 void AssignmentFromConstant(Handle<String> name, Handle<Object> value) {
1583 EnsureAllocation();
1584 names_->Add(name);
1585 assigned_arguments_->Add(-1);
1586 assigned_constants_->Add(value);
1587 }
1588
AssignmentFromSomethingElse(Handle<String> name)1589 void AssignmentFromSomethingElse(Handle<String> name) {
1590 EnsureAllocation();
1591 names_->Add(name);
1592 assigned_arguments_->Add(-1);
1593 assigned_constants_->Add(Factory::undefined_value());
1594
1595 // The this assignment is not a simple one.
1596 only_simple_this_property_assignments_ = false;
1597 }
1598
EnsureAllocation()1599 void EnsureAllocation() {
1600 if (names_ == NULL) {
1601 ASSERT(assigned_arguments_ == NULL);
1602 ASSERT(assigned_constants_ == NULL);
1603 names_ = new ZoneStringList(4);
1604 assigned_arguments_ = new ZoneList<int>(4);
1605 assigned_constants_ = new ZoneObjectList(4);
1606 }
1607 }
1608
1609 bool only_this_property_assignments_;
1610 bool only_simple_this_property_assignments_;
1611 ZoneStringList* names_;
1612 ZoneList<int>* assigned_arguments_;
1613 ZoneObjectList* assigned_constants_;
1614 };
1615
1616
ParseSourceElements(ZoneListWrapper<Statement> * processor,int end_token,bool * ok)1617 void* Parser::ParseSourceElements(ZoneListWrapper<Statement>* processor,
1618 int end_token,
1619 bool* ok) {
1620 // SourceElements ::
1621 // (Statement)* <end_token>
1622
1623 // Allocate a target stack to use for this set of source
1624 // elements. This way, all scripts and functions get their own
1625 // target stack thus avoiding illegal breaks and continues across
1626 // functions.
1627 TargetScope scope(this);
1628
1629 ASSERT(processor != NULL);
1630 InitializationBlockFinder block_finder;
1631 ThisNamedPropertyAssigmentFinder this_property_assignment_finder;
1632 while (peek() != end_token) {
1633 Statement* stat = ParseStatement(NULL, CHECK_OK);
1634 if (stat == NULL || stat->IsEmpty()) continue;
1635 // We find and mark the initialization blocks on top level code only.
1636 // This is because the optimization prevents reuse of the map transitions,
1637 // so it should be used only for code that will only be run once.
1638 if (top_scope_->is_global_scope()) {
1639 block_finder.Update(stat);
1640 }
1641 // Find and mark all assignments to named properties in this (this.x =)
1642 if (top_scope_->is_function_scope()) {
1643 this_property_assignment_finder.Update(top_scope_, stat);
1644 }
1645 processor->Add(stat);
1646 }
1647
1648 // Propagate the collected information on this property assignments.
1649 if (top_scope_->is_function_scope()) {
1650 if (this_property_assignment_finder.only_this_property_assignments()) {
1651 temp_scope_->SetThisPropertyAssignmentInfo(
1652 this_property_assignment_finder.only_this_property_assignments(),
1653 this_property_assignment_finder.
1654 only_simple_this_property_assignments(),
1655 this_property_assignment_finder.GetThisPropertyAssignments());
1656 }
1657 }
1658 return 0;
1659 }
1660
1661
ParseStatement(ZoneStringList * labels,bool * ok)1662 Statement* Parser::ParseStatement(ZoneStringList* labels, bool* ok) {
1663 // Statement ::
1664 // Block
1665 // VariableStatement
1666 // EmptyStatement
1667 // ExpressionStatement
1668 // IfStatement
1669 // IterationStatement
1670 // ContinueStatement
1671 // BreakStatement
1672 // ReturnStatement
1673 // WithStatement
1674 // LabelledStatement
1675 // SwitchStatement
1676 // ThrowStatement
1677 // TryStatement
1678 // DebuggerStatement
1679
1680 // Note: Since labels can only be used by 'break' and 'continue'
1681 // statements, which themselves are only valid within blocks,
1682 // iterations or 'switch' statements (i.e., BreakableStatements),
1683 // labels can be simply ignored in all other cases; except for
1684 // trivial labeled break statements 'label: break label' which is
1685 // parsed into an empty statement.
1686
1687 // Keep the source position of the statement
1688 int statement_pos = scanner().peek_location().beg_pos;
1689 Statement* stmt = NULL;
1690 switch (peek()) {
1691 case Token::LBRACE:
1692 return ParseBlock(labels, ok);
1693
1694 case Token::CONST: // fall through
1695 case Token::VAR:
1696 stmt = ParseVariableStatement(ok);
1697 break;
1698
1699 case Token::SEMICOLON:
1700 Next();
1701 return factory()->EmptyStatement();
1702
1703 case Token::IF:
1704 stmt = ParseIfStatement(labels, ok);
1705 break;
1706
1707 case Token::DO:
1708 stmt = ParseDoStatement(labels, ok);
1709 break;
1710
1711 case Token::WHILE:
1712 stmt = ParseWhileStatement(labels, ok);
1713 break;
1714
1715 case Token::FOR:
1716 stmt = ParseForStatement(labels, ok);
1717 break;
1718
1719 case Token::CONTINUE:
1720 stmt = ParseContinueStatement(ok);
1721 break;
1722
1723 case Token::BREAK:
1724 stmt = ParseBreakStatement(labels, ok);
1725 break;
1726
1727 case Token::RETURN:
1728 stmt = ParseReturnStatement(ok);
1729 break;
1730
1731 case Token::WITH:
1732 stmt = ParseWithStatement(labels, ok);
1733 break;
1734
1735 case Token::SWITCH:
1736 stmt = ParseSwitchStatement(labels, ok);
1737 break;
1738
1739 case Token::THROW:
1740 stmt = ParseThrowStatement(ok);
1741 break;
1742
1743 case Token::TRY: {
1744 // NOTE: It is somewhat complicated to have labels on
1745 // try-statements. When breaking out of a try-finally statement,
1746 // one must take great care not to treat it as a
1747 // fall-through. It is much easier just to wrap the entire
1748 // try-statement in a statement block and put the labels there
1749 Block* result = NEW(Block(labels, 1, false));
1750 Target target(this, result);
1751 TryStatement* statement = ParseTryStatement(CHECK_OK);
1752 if (statement) {
1753 statement->set_statement_pos(statement_pos);
1754 }
1755 if (result) result->AddStatement(statement);
1756 return result;
1757 }
1758
1759 case Token::FUNCTION:
1760 return ParseFunctionDeclaration(ok);
1761
1762 case Token::NATIVE:
1763 return ParseNativeDeclaration(ok);
1764
1765 case Token::DEBUGGER:
1766 stmt = ParseDebuggerStatement(ok);
1767 break;
1768
1769 default:
1770 stmt = ParseExpressionOrLabelledStatement(labels, ok);
1771 }
1772
1773 // Store the source position of the statement
1774 if (stmt != NULL) stmt->set_statement_pos(statement_pos);
1775 return stmt;
1776 }
1777
1778
Declare(Handle<String> name,Variable::Mode mode,FunctionLiteral * fun,bool resolve,bool * ok)1779 VariableProxy* AstBuildingParser::Declare(Handle<String> name,
1780 Variable::Mode mode,
1781 FunctionLiteral* fun,
1782 bool resolve,
1783 bool* ok) {
1784 Variable* var = NULL;
1785 // If we are inside a function, a declaration of a variable
1786 // is a truly local variable, and the scope of the variable
1787 // is always the function scope.
1788
1789 // If a function scope exists, then we can statically declare this
1790 // variable and also set its mode. In any case, a Declaration node
1791 // will be added to the scope so that the declaration can be added
1792 // to the corresponding activation frame at runtime if necessary.
1793 // For instance declarations inside an eval scope need to be added
1794 // to the calling function context.
1795 if (top_scope_->is_function_scope()) {
1796 // Declare the variable in the function scope.
1797 var = top_scope_->LocalLookup(name);
1798 if (var == NULL) {
1799 // Declare the name.
1800 var = top_scope_->DeclareLocal(name, mode);
1801 } else {
1802 // The name was declared before; check for conflicting
1803 // re-declarations. If the previous declaration was a const or the
1804 // current declaration is a const then we have a conflict. There is
1805 // similar code in runtime.cc in the Declare functions.
1806 if ((mode == Variable::CONST) || (var->mode() == Variable::CONST)) {
1807 // We only have vars and consts in declarations.
1808 ASSERT(var->mode() == Variable::VAR ||
1809 var->mode() == Variable::CONST);
1810 const char* type = (var->mode() == Variable::VAR) ? "var" : "const";
1811 Handle<String> type_string =
1812 Factory::NewStringFromUtf8(CStrVector(type), TENURED);
1813 Expression* expression =
1814 NewThrowTypeError(Factory::redeclaration_symbol(),
1815 type_string, name);
1816 top_scope_->SetIllegalRedeclaration(expression);
1817 }
1818 }
1819 }
1820
1821 // We add a declaration node for every declaration. The compiler
1822 // will only generate code if necessary. In particular, declarations
1823 // for inner local variables that do not represent functions won't
1824 // result in any generated code.
1825 //
1826 // Note that we always add an unresolved proxy even if it's not
1827 // used, simply because we don't know in this method (w/o extra
1828 // parameters) if the proxy is needed or not. The proxy will be
1829 // bound during variable resolution time unless it was pre-bound
1830 // below.
1831 //
1832 // WARNING: This will lead to multiple declaration nodes for the
1833 // same variable if it is declared several times. This is not a
1834 // semantic issue as long as we keep the source order, but it may be
1835 // a performance issue since it may lead to repeated
1836 // Runtime::DeclareContextSlot() calls.
1837 VariableProxy* proxy = top_scope_->NewUnresolved(name, inside_with());
1838 top_scope_->AddDeclaration(NEW(Declaration(proxy, mode, fun)));
1839
1840 // For global const variables we bind the proxy to a variable.
1841 if (mode == Variable::CONST && top_scope_->is_global_scope()) {
1842 ASSERT(resolve); // should be set by all callers
1843 Variable::Kind kind = Variable::NORMAL;
1844 var = NEW(Variable(top_scope_, name, Variable::CONST, true, kind));
1845 }
1846
1847 // If requested and we have a local variable, bind the proxy to the variable
1848 // at parse-time. This is used for functions (and consts) declared inside
1849 // statements: the corresponding function (or const) variable must be in the
1850 // function scope and not a statement-local scope, e.g. as provided with a
1851 // 'with' statement:
1852 //
1853 // with (obj) {
1854 // function f() {}
1855 // }
1856 //
1857 // which is translated into:
1858 //
1859 // with (obj) {
1860 // // in this case this is not: 'var f; f = function () {};'
1861 // var f = function () {};
1862 // }
1863 //
1864 // Note that if 'f' is accessed from inside the 'with' statement, it
1865 // will be allocated in the context (because we must be able to look
1866 // it up dynamically) but it will also be accessed statically, i.e.,
1867 // with a context slot index and a context chain length for this
1868 // initialization code. Thus, inside the 'with' statement, we need
1869 // both access to the static and the dynamic context chain; the
1870 // runtime needs to provide both.
1871 if (resolve && var != NULL) proxy->BindTo(var);
1872
1873 return proxy;
1874 }
1875
1876
1877 // Language extension which is only enabled for source files loaded
1878 // through the API's extension mechanism. A native function
1879 // declaration is resolved by looking up the function through a
1880 // callback provided by the extension.
ParseNativeDeclaration(bool * ok)1881 Statement* Parser::ParseNativeDeclaration(bool* ok) {
1882 if (extension_ == NULL) {
1883 ReportUnexpectedToken(Token::NATIVE);
1884 *ok = false;
1885 return NULL;
1886 }
1887
1888 Expect(Token::NATIVE, CHECK_OK);
1889 Expect(Token::FUNCTION, CHECK_OK);
1890 Handle<String> name = ParseIdentifier(CHECK_OK);
1891 Expect(Token::LPAREN, CHECK_OK);
1892 bool done = (peek() == Token::RPAREN);
1893 while (!done) {
1894 ParseIdentifier(CHECK_OK);
1895 done = (peek() == Token::RPAREN);
1896 if (!done) Expect(Token::COMMA, CHECK_OK);
1897 }
1898 Expect(Token::RPAREN, CHECK_OK);
1899 Expect(Token::SEMICOLON, CHECK_OK);
1900
1901 if (is_pre_parsing_) return NULL;
1902
1903 // Make sure that the function containing the native declaration
1904 // isn't lazily compiled. The extension structures are only
1905 // accessible while parsing the first time not when reparsing
1906 // because of lazy compilation.
1907 top_scope_->ForceEagerCompilation();
1908
1909 // Compute the function template for the native function.
1910 v8::Handle<v8::FunctionTemplate> fun_template =
1911 extension_->GetNativeFunction(v8::Utils::ToLocal(name));
1912 ASSERT(!fun_template.IsEmpty());
1913
1914 // Instantiate the function and create a boilerplate function from it.
1915 Handle<JSFunction> fun = Utils::OpenHandle(*fun_template->GetFunction());
1916 const int literals = fun->NumberOfLiterals();
1917 Handle<Code> code = Handle<Code>(fun->shared()->code());
1918 Handle<JSFunction> boilerplate =
1919 Factory::NewFunctionBoilerplate(name, literals, false, code);
1920
1921 // Copy the function data to the boilerplate. Used by
1922 // builtins.cc:HandleApiCall to perform argument type checks and to
1923 // find the right native code to call.
1924 boilerplate->shared()->set_function_data(fun->shared()->function_data());
1925 int parameters = fun->shared()->formal_parameter_count();
1926 boilerplate->shared()->set_formal_parameter_count(parameters);
1927
1928 // TODO(1240846): It's weird that native function declarations are
1929 // introduced dynamically when we meet their declarations, whereas
1930 // other functions are setup when entering the surrounding scope.
1931 FunctionBoilerplateLiteral* lit =
1932 NEW(FunctionBoilerplateLiteral(boilerplate));
1933 VariableProxy* var = Declare(name, Variable::VAR, NULL, true, CHECK_OK);
1934 return NEW(ExpressionStatement(
1935 new Assignment(Token::INIT_VAR, var, lit, RelocInfo::kNoPosition)));
1936 }
1937
1938
ParseFunctionDeclaration(bool * ok)1939 Statement* Parser::ParseFunctionDeclaration(bool* ok) {
1940 // Parse a function literal. We may or may not have a function name.
1941 // If we have a name we use it as the variable name for the function
1942 // (a function declaration) and not as the function name of a function
1943 // expression.
1944
1945 Expect(Token::FUNCTION, CHECK_OK);
1946 int function_token_position = scanner().location().beg_pos;
1947
1948 Handle<String> name;
1949 if (peek() == Token::IDENTIFIER) name = ParseIdentifier(CHECK_OK);
1950 FunctionLiteral* fun = ParseFunctionLiteral(name, function_token_position,
1951 DECLARATION, CHECK_OK);
1952
1953 if (name.is_null()) {
1954 // We don't have a name - it is always an anonymous function
1955 // expression.
1956 return NEW(ExpressionStatement(fun));
1957 } else {
1958 // We have a name so even if we're not at the top-level of the
1959 // global or a function scope, we treat is as such and introduce
1960 // the function with it's initial value upon entering the
1961 // corresponding scope.
1962 Declare(name, Variable::VAR, fun, true, CHECK_OK);
1963 return factory()->EmptyStatement();
1964 }
1965 }
1966
1967
ParseBlock(ZoneStringList * labels,bool * ok)1968 Block* Parser::ParseBlock(ZoneStringList* labels, bool* ok) {
1969 // Block ::
1970 // '{' Statement* '}'
1971
1972 // Note that a Block does not introduce a new execution scope!
1973 // (ECMA-262, 3rd, 12.2)
1974 //
1975 // Construct block expecting 16 statements.
1976 Block* result = NEW(Block(labels, 16, false));
1977 Target target(this, result);
1978 Expect(Token::LBRACE, CHECK_OK);
1979 while (peek() != Token::RBRACE) {
1980 Statement* stat = ParseStatement(NULL, CHECK_OK);
1981 if (stat && !stat->IsEmpty()) result->AddStatement(stat);
1982 }
1983 Expect(Token::RBRACE, CHECK_OK);
1984 return result;
1985 }
1986
1987
ParseVariableStatement(bool * ok)1988 Block* Parser::ParseVariableStatement(bool* ok) {
1989 // VariableStatement ::
1990 // VariableDeclarations ';'
1991
1992 Expression* dummy; // to satisfy the ParseVariableDeclarations() signature
1993 Block* result = ParseVariableDeclarations(true, &dummy, CHECK_OK);
1994 ExpectSemicolon(CHECK_OK);
1995 return result;
1996 }
1997
1998
1999 // If the variable declaration declares exactly one non-const
2000 // variable, then *var is set to that variable. In all other cases,
2001 // *var is untouched; in particular, it is the caller's responsibility
2002 // to initialize it properly. This mechanism is used for the parsing
2003 // of 'for-in' loops.
ParseVariableDeclarations(bool accept_IN,Expression ** var,bool * ok)2004 Block* Parser::ParseVariableDeclarations(bool accept_IN,
2005 Expression** var,
2006 bool* ok) {
2007 // VariableDeclarations ::
2008 // ('var' | 'const') (Identifier ('=' AssignmentExpression)?)+[',']
2009
2010 Variable::Mode mode = Variable::VAR;
2011 bool is_const = false;
2012 if (peek() == Token::VAR) {
2013 Consume(Token::VAR);
2014 } else if (peek() == Token::CONST) {
2015 Consume(Token::CONST);
2016 mode = Variable::CONST;
2017 is_const = true;
2018 } else {
2019 UNREACHABLE(); // by current callers
2020 }
2021
2022 // The scope of a variable/const declared anywhere inside a function
2023 // is the entire function (ECMA-262, 3rd, 10.1.3, and 12.2). Thus we can
2024 // transform a source-level variable/const declaration into a (Function)
2025 // Scope declaration, and rewrite the source-level initialization into an
2026 // assignment statement. We use a block to collect multiple assignments.
2027 //
2028 // We mark the block as initializer block because we don't want the
2029 // rewriter to add a '.result' assignment to such a block (to get compliant
2030 // behavior for code such as print(eval('var x = 7')), and for cosmetic
2031 // reasons when pretty-printing. Also, unless an assignment (initialization)
2032 // is inside an initializer block, it is ignored.
2033 //
2034 // Create new block with one expected declaration.
2035 Block* block = NEW(Block(NULL, 1, true));
2036 VariableProxy* last_var = NULL; // the last variable declared
2037 int nvars = 0; // the number of variables declared
2038 do {
2039 // Parse variable name.
2040 if (nvars > 0) Consume(Token::COMMA);
2041 Handle<String> name = ParseIdentifier(CHECK_OK);
2042
2043 // Declare variable.
2044 // Note that we *always* must treat the initial value via a separate init
2045 // assignment for variables and constants because the value must be assigned
2046 // when the variable is encountered in the source. But the variable/constant
2047 // is declared (and set to 'undefined') upon entering the function within
2048 // which the variable or constant is declared. Only function variables have
2049 // an initial value in the declaration (because they are initialized upon
2050 // entering the function).
2051 //
2052 // If we have a const declaration, in an inner scope, the proxy is always
2053 // bound to the declared variable (independent of possibly surrounding with
2054 // statements).
2055 last_var = Declare(name, mode, NULL,
2056 is_const /* always bound for CONST! */,
2057 CHECK_OK);
2058 nvars++;
2059
2060 // Parse initialization expression if present and/or needed. A
2061 // declaration of the form:
2062 //
2063 // var v = x;
2064 //
2065 // is syntactic sugar for:
2066 //
2067 // var v; v = x;
2068 //
2069 // In particular, we need to re-lookup 'v' as it may be a
2070 // different 'v' than the 'v' in the declaration (if we are inside
2071 // a 'with' statement that makes a object property with name 'v'
2072 // visible).
2073 //
2074 // However, note that const declarations are different! A const
2075 // declaration of the form:
2076 //
2077 // const c = x;
2078 //
2079 // is *not* syntactic sugar for:
2080 //
2081 // const c; c = x;
2082 //
2083 // The "variable" c initialized to x is the same as the declared
2084 // one - there is no re-lookup (see the last parameter of the
2085 // Declare() call above).
2086
2087 Expression* value = NULL;
2088 int position = -1;
2089 if (peek() == Token::ASSIGN) {
2090 Expect(Token::ASSIGN, CHECK_OK);
2091 position = scanner().location().beg_pos;
2092 value = ParseAssignmentExpression(accept_IN, CHECK_OK);
2093 }
2094
2095 // Make sure that 'const c' actually initializes 'c' to undefined
2096 // even though it seems like a stupid thing to do.
2097 if (value == NULL && is_const) {
2098 value = GetLiteralUndefined();
2099 }
2100
2101 // Global variable declarations must be compiled in a specific
2102 // way. When the script containing the global variable declaration
2103 // is entered, the global variable must be declared, so that if it
2104 // doesn't exist (not even in a prototype of the global object) it
2105 // gets created with an initial undefined value. This is handled
2106 // by the declarations part of the function representing the
2107 // top-level global code; see Runtime::DeclareGlobalVariable. If
2108 // it already exists (in the object or in a prototype), it is
2109 // *not* touched until the variable declaration statement is
2110 // executed.
2111 //
2112 // Executing the variable declaration statement will always
2113 // guarantee to give the global object a "local" variable; a
2114 // variable defined in the global object and not in any
2115 // prototype. This way, global variable declarations can shadow
2116 // properties in the prototype chain, but only after the variable
2117 // declaration statement has been executed. This is important in
2118 // browsers where the global object (window) has lots of
2119 // properties defined in prototype objects.
2120
2121 if (!is_pre_parsing_ && top_scope_->is_global_scope()) {
2122 // Compute the arguments for the runtime call.
2123 ZoneList<Expression*>* arguments = new ZoneList<Expression*>(2);
2124 // Be careful not to assign a value to the global variable if
2125 // we're in a with. The initialization value should not
2126 // necessarily be stored in the global object in that case,
2127 // which is why we need to generate a separate assignment node.
2128 arguments->Add(NEW(Literal(name))); // we have at least 1 parameter
2129 if (is_const || (value != NULL && !inside_with())) {
2130 arguments->Add(value);
2131 value = NULL; // zap the value to avoid the unnecessary assignment
2132 }
2133 // Construct the call to Runtime::DeclareGlobal{Variable,Const}Locally
2134 // and add it to the initialization statement block. Note that
2135 // this function does different things depending on if we have
2136 // 1 or 2 parameters.
2137 CallRuntime* initialize;
2138 if (is_const) {
2139 initialize =
2140 NEW(CallRuntime(
2141 Factory::InitializeConstGlobal_symbol(),
2142 Runtime::FunctionForId(Runtime::kInitializeConstGlobal),
2143 arguments));
2144 } else {
2145 initialize =
2146 NEW(CallRuntime(
2147 Factory::InitializeVarGlobal_symbol(),
2148 Runtime::FunctionForId(Runtime::kInitializeVarGlobal),
2149 arguments));
2150 }
2151 block->AddStatement(NEW(ExpressionStatement(initialize)));
2152 }
2153
2154 // Add an assignment node to the initialization statement block if
2155 // we still have a pending initialization value. We must distinguish
2156 // between variables and constants: Variable initializations are simply
2157 // assignments (with all the consequences if they are inside a 'with'
2158 // statement - they may change a 'with' object property). Constant
2159 // initializations always assign to the declared constant which is
2160 // always at the function scope level. This is only relevant for
2161 // dynamically looked-up variables and constants (the start context
2162 // for constant lookups is always the function context, while it is
2163 // the top context for variables). Sigh...
2164 if (value != NULL) {
2165 Token::Value op = (is_const ? Token::INIT_CONST : Token::INIT_VAR);
2166 Assignment* assignment = NEW(Assignment(op, last_var, value, position));
2167 if (block) block->AddStatement(NEW(ExpressionStatement(assignment)));
2168 }
2169 } while (peek() == Token::COMMA);
2170
2171 if (!is_const && nvars == 1) {
2172 // We have a single, non-const variable.
2173 if (is_pre_parsing_) {
2174 // If we're preparsing then we need to set the var to something
2175 // in order for for-in loops to parse correctly.
2176 *var = ValidLeftHandSideSentinel::instance();
2177 } else {
2178 ASSERT(last_var != NULL);
2179 *var = last_var;
2180 }
2181 }
2182
2183 return block;
2184 }
2185
2186
ContainsLabel(ZoneStringList * labels,Handle<String> label)2187 static bool ContainsLabel(ZoneStringList* labels, Handle<String> label) {
2188 ASSERT(!label.is_null());
2189 if (labels != NULL)
2190 for (int i = labels->length(); i-- > 0; )
2191 if (labels->at(i).is_identical_to(label))
2192 return true;
2193
2194 return false;
2195 }
2196
2197
ParseExpressionOrLabelledStatement(ZoneStringList * labels,bool * ok)2198 Statement* Parser::ParseExpressionOrLabelledStatement(ZoneStringList* labels,
2199 bool* ok) {
2200 // ExpressionStatement | LabelledStatement ::
2201 // Expression ';'
2202 // Identifier ':' Statement
2203
2204 Expression* expr = ParseExpression(true, CHECK_OK);
2205 if (peek() == Token::COLON && expr &&
2206 expr->AsVariableProxy() != NULL &&
2207 !expr->AsVariableProxy()->is_this()) {
2208 VariableProxy* var = expr->AsVariableProxy();
2209 Handle<String> label = var->name();
2210 // TODO(1240780): We don't check for redeclaration of labels
2211 // during preparsing since keeping track of the set of active
2212 // labels requires nontrivial changes to the way scopes are
2213 // structured. However, these are probably changes we want to
2214 // make later anyway so we should go back and fix this then.
2215 if (!is_pre_parsing_) {
2216 if (ContainsLabel(labels, label) || TargetStackContainsLabel(label)) {
2217 SmartPointer<char> c_string = label->ToCString(DISALLOW_NULLS);
2218 const char* elms[2] = { "Label", *c_string };
2219 Vector<const char*> args(elms, 2);
2220 ReportMessage("redeclaration", args);
2221 *ok = false;
2222 return NULL;
2223 }
2224 if (labels == NULL) labels = new ZoneStringList(4);
2225 labels->Add(label);
2226 // Remove the "ghost" variable that turned out to be a label
2227 // from the top scope. This way, we don't try to resolve it
2228 // during the scope processing.
2229 top_scope_->RemoveUnresolved(var);
2230 }
2231 Expect(Token::COLON, CHECK_OK);
2232 return ParseStatement(labels, ok);
2233 }
2234
2235 // Parsed expression statement.
2236 ExpectSemicolon(CHECK_OK);
2237 return NEW(ExpressionStatement(expr));
2238 }
2239
2240
ParseIfStatement(ZoneStringList * labels,bool * ok)2241 IfStatement* Parser::ParseIfStatement(ZoneStringList* labels, bool* ok) {
2242 // IfStatement ::
2243 // 'if' '(' Expression ')' Statement ('else' Statement)?
2244
2245 Expect(Token::IF, CHECK_OK);
2246 Expect(Token::LPAREN, CHECK_OK);
2247 Expression* condition = ParseExpression(true, CHECK_OK);
2248 Expect(Token::RPAREN, CHECK_OK);
2249 Statement* then_statement = ParseStatement(labels, CHECK_OK);
2250 Statement* else_statement = NULL;
2251 if (peek() == Token::ELSE) {
2252 Next();
2253 else_statement = ParseStatement(labels, CHECK_OK);
2254 } else if (!is_pre_parsing_) {
2255 else_statement = factory()->EmptyStatement();
2256 }
2257 return NEW(IfStatement(condition, then_statement, else_statement));
2258 }
2259
2260
ParseContinueStatement(bool * ok)2261 Statement* Parser::ParseContinueStatement(bool* ok) {
2262 // ContinueStatement ::
2263 // 'continue' Identifier? ';'
2264
2265 Expect(Token::CONTINUE, CHECK_OK);
2266 Handle<String> label = Handle<String>::null();
2267 Token::Value tok = peek();
2268 if (!scanner_.has_line_terminator_before_next() &&
2269 tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
2270 label = ParseIdentifier(CHECK_OK);
2271 }
2272 IterationStatement* target = NULL;
2273 if (!is_pre_parsing_) {
2274 target = LookupContinueTarget(label, CHECK_OK);
2275 if (target == NULL) {
2276 // Illegal continue statement. To be consistent with KJS we delay
2277 // reporting of the syntax error until runtime.
2278 Handle<String> error_type = Factory::illegal_continue_symbol();
2279 if (!label.is_null()) error_type = Factory::unknown_label_symbol();
2280 Expression* throw_error = NewThrowSyntaxError(error_type, label);
2281 return NEW(ExpressionStatement(throw_error));
2282 }
2283 }
2284 ExpectSemicolon(CHECK_OK);
2285 return NEW(ContinueStatement(target));
2286 }
2287
2288
ParseBreakStatement(ZoneStringList * labels,bool * ok)2289 Statement* Parser::ParseBreakStatement(ZoneStringList* labels, bool* ok) {
2290 // BreakStatement ::
2291 // 'break' Identifier? ';'
2292
2293 Expect(Token::BREAK, CHECK_OK);
2294 Handle<String> label;
2295 Token::Value tok = peek();
2296 if (!scanner_.has_line_terminator_before_next() &&
2297 tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
2298 label = ParseIdentifier(CHECK_OK);
2299 }
2300 // Parse labeled break statements that target themselves into
2301 // empty statements, e.g. 'l1: l2: l3: break l2;'
2302 if (!label.is_null() && ContainsLabel(labels, label)) {
2303 return factory()->EmptyStatement();
2304 }
2305 BreakableStatement* target = NULL;
2306 if (!is_pre_parsing_) {
2307 target = LookupBreakTarget(label, CHECK_OK);
2308 if (target == NULL) {
2309 // Illegal break statement. To be consistent with KJS we delay
2310 // reporting of the syntax error until runtime.
2311 Handle<String> error_type = Factory::illegal_break_symbol();
2312 if (!label.is_null()) error_type = Factory::unknown_label_symbol();
2313 Expression* throw_error = NewThrowSyntaxError(error_type, label);
2314 return NEW(ExpressionStatement(throw_error));
2315 }
2316 }
2317 ExpectSemicolon(CHECK_OK);
2318 return NEW(BreakStatement(target));
2319 }
2320
2321
ParseReturnStatement(bool * ok)2322 Statement* Parser::ParseReturnStatement(bool* ok) {
2323 // ReturnStatement ::
2324 // 'return' Expression? ';'
2325
2326 // Consume the return token. It is necessary to do the before
2327 // reporting any errors on it, because of the way errors are
2328 // reported (underlining).
2329 Expect(Token::RETURN, CHECK_OK);
2330
2331 // An ECMAScript program is considered syntactically incorrect if it
2332 // contains a return statement that is not within the body of a
2333 // function. See ECMA-262, section 12.9, page 67.
2334 //
2335 // To be consistent with KJS we report the syntax error at runtime.
2336 if (!is_pre_parsing_ && !top_scope_->is_function_scope()) {
2337 Handle<String> type = Factory::illegal_return_symbol();
2338 Expression* throw_error = NewThrowSyntaxError(type, Handle<Object>::null());
2339 return NEW(ExpressionStatement(throw_error));
2340 }
2341
2342 Token::Value tok = peek();
2343 if (scanner_.has_line_terminator_before_next() ||
2344 tok == Token::SEMICOLON ||
2345 tok == Token::RBRACE ||
2346 tok == Token::EOS) {
2347 ExpectSemicolon(CHECK_OK);
2348 return NEW(ReturnStatement(GetLiteralUndefined()));
2349 }
2350
2351 Expression* expr = ParseExpression(true, CHECK_OK);
2352 ExpectSemicolon(CHECK_OK);
2353 return NEW(ReturnStatement(expr));
2354 }
2355
2356
WithHelper(Expression * obj,ZoneStringList * labels,bool is_catch_block,bool * ok)2357 Block* Parser::WithHelper(Expression* obj,
2358 ZoneStringList* labels,
2359 bool is_catch_block,
2360 bool* ok) {
2361 // Parse the statement and collect escaping labels.
2362 ZoneList<BreakTarget*>* target_list = NEW(ZoneList<BreakTarget*>(0));
2363 TargetCollector collector(target_list);
2364 Statement* stat;
2365 { Target target(this, &collector);
2366 with_nesting_level_++;
2367 top_scope_->RecordWithStatement();
2368 stat = ParseStatement(labels, CHECK_OK);
2369 with_nesting_level_--;
2370 }
2371 // Create resulting block with two statements.
2372 // 1: Evaluate the with expression.
2373 // 2: The try-finally block evaluating the body.
2374 Block* result = NEW(Block(NULL, 2, false));
2375
2376 if (result != NULL) {
2377 result->AddStatement(NEW(WithEnterStatement(obj, is_catch_block)));
2378
2379 // Create body block.
2380 Block* body = NEW(Block(NULL, 1, false));
2381 body->AddStatement(stat);
2382
2383 // Create exit block.
2384 Block* exit = NEW(Block(NULL, 1, false));
2385 exit->AddStatement(NEW(WithExitStatement()));
2386
2387 // Return a try-finally statement.
2388 TryFinally* wrapper = NEW(TryFinally(body, exit));
2389 wrapper->set_escaping_targets(collector.targets());
2390 result->AddStatement(wrapper);
2391 }
2392 return result;
2393 }
2394
2395
ParseWithStatement(ZoneStringList * labels,bool * ok)2396 Statement* Parser::ParseWithStatement(ZoneStringList* labels, bool* ok) {
2397 // WithStatement ::
2398 // 'with' '(' Expression ')' Statement
2399
2400 Expect(Token::WITH, CHECK_OK);
2401 Expect(Token::LPAREN, CHECK_OK);
2402 Expression* expr = ParseExpression(true, CHECK_OK);
2403 Expect(Token::RPAREN, CHECK_OK);
2404
2405 return WithHelper(expr, labels, false, CHECK_OK);
2406 }
2407
2408
ParseCaseClause(bool * default_seen_ptr,bool * ok)2409 CaseClause* Parser::ParseCaseClause(bool* default_seen_ptr, bool* ok) {
2410 // CaseClause ::
2411 // 'case' Expression ':' Statement*
2412 // 'default' ':' Statement*
2413
2414 Expression* label = NULL; // NULL expression indicates default case
2415 if (peek() == Token::CASE) {
2416 Expect(Token::CASE, CHECK_OK);
2417 label = ParseExpression(true, CHECK_OK);
2418 } else {
2419 Expect(Token::DEFAULT, CHECK_OK);
2420 if (*default_seen_ptr) {
2421 ReportMessage("multiple_defaults_in_switch",
2422 Vector<const char*>::empty());
2423 *ok = false;
2424 return NULL;
2425 }
2426 *default_seen_ptr = true;
2427 }
2428 Expect(Token::COLON, CHECK_OK);
2429
2430 ZoneListWrapper<Statement> statements = factory()->NewList<Statement>(5);
2431 while (peek() != Token::CASE &&
2432 peek() != Token::DEFAULT &&
2433 peek() != Token::RBRACE) {
2434 Statement* stat = ParseStatement(NULL, CHECK_OK);
2435 statements.Add(stat);
2436 }
2437
2438 return NEW(CaseClause(label, statements.elements()));
2439 }
2440
2441
ParseSwitchStatement(ZoneStringList * labels,bool * ok)2442 SwitchStatement* Parser::ParseSwitchStatement(ZoneStringList* labels,
2443 bool* ok) {
2444 // SwitchStatement ::
2445 // 'switch' '(' Expression ')' '{' CaseClause* '}'
2446
2447 SwitchStatement* statement = NEW(SwitchStatement(labels));
2448 Target target(this, statement);
2449
2450 Expect(Token::SWITCH, CHECK_OK);
2451 Expect(Token::LPAREN, CHECK_OK);
2452 Expression* tag = ParseExpression(true, CHECK_OK);
2453 Expect(Token::RPAREN, CHECK_OK);
2454
2455 bool default_seen = false;
2456 ZoneListWrapper<CaseClause> cases = factory()->NewList<CaseClause>(4);
2457 Expect(Token::LBRACE, CHECK_OK);
2458 while (peek() != Token::RBRACE) {
2459 CaseClause* clause = ParseCaseClause(&default_seen, CHECK_OK);
2460 cases.Add(clause);
2461 }
2462 Expect(Token::RBRACE, CHECK_OK);
2463
2464 if (statement) statement->Initialize(tag, cases.elements());
2465 return statement;
2466 }
2467
2468
ParseThrowStatement(bool * ok)2469 Statement* Parser::ParseThrowStatement(bool* ok) {
2470 // ThrowStatement ::
2471 // 'throw' Expression ';'
2472
2473 Expect(Token::THROW, CHECK_OK);
2474 int pos = scanner().location().beg_pos;
2475 if (scanner_.has_line_terminator_before_next()) {
2476 ReportMessage("newline_after_throw", Vector<const char*>::empty());
2477 *ok = false;
2478 return NULL;
2479 }
2480 Expression* exception = ParseExpression(true, CHECK_OK);
2481 ExpectSemicolon(CHECK_OK);
2482
2483 return NEW(ExpressionStatement(new Throw(exception, pos)));
2484 }
2485
2486
ParseTryStatement(bool * ok)2487 TryStatement* Parser::ParseTryStatement(bool* ok) {
2488 // TryStatement ::
2489 // 'try' Block Catch
2490 // 'try' Block Finally
2491 // 'try' Block Catch Finally
2492 //
2493 // Catch ::
2494 // 'catch' '(' Identifier ')' Block
2495 //
2496 // Finally ::
2497 // 'finally' Block
2498
2499 Expect(Token::TRY, CHECK_OK);
2500
2501 ZoneList<BreakTarget*>* target_list = NEW(ZoneList<BreakTarget*>(0));
2502 TargetCollector collector(target_list);
2503 Block* try_block;
2504
2505 { Target target(this, &collector);
2506 try_block = ParseBlock(NULL, CHECK_OK);
2507 }
2508
2509 Block* catch_block = NULL;
2510 VariableProxy* catch_var = NULL;
2511 Block* finally_block = NULL;
2512
2513 Token::Value tok = peek();
2514 if (tok != Token::CATCH && tok != Token::FINALLY) {
2515 ReportMessage("no_catch_or_finally", Vector<const char*>::empty());
2516 *ok = false;
2517 return NULL;
2518 }
2519
2520 // If we can break out from the catch block and there is a finally block,
2521 // then we will need to collect jump targets from the catch block. Since
2522 // we don't know yet if there will be a finally block, we always collect
2523 // the jump targets.
2524 ZoneList<BreakTarget*>* catch_target_list = NEW(ZoneList<BreakTarget*>(0));
2525 TargetCollector catch_collector(catch_target_list);
2526 bool has_catch = false;
2527 if (tok == Token::CATCH) {
2528 has_catch = true;
2529 Consume(Token::CATCH);
2530
2531 Expect(Token::LPAREN, CHECK_OK);
2532 Handle<String> name = ParseIdentifier(CHECK_OK);
2533 Expect(Token::RPAREN, CHECK_OK);
2534
2535 if (peek() == Token::LBRACE) {
2536 // Allocate a temporary for holding the finally state while
2537 // executing the finally block.
2538 catch_var = top_scope_->NewTemporary(Factory::catch_var_symbol());
2539 Literal* name_literal = NEW(Literal(name));
2540 Expression* obj = NEW(CatchExtensionObject(name_literal, catch_var));
2541 { Target target(this, &catch_collector);
2542 catch_block = WithHelper(obj, NULL, true, CHECK_OK);
2543 }
2544 } else {
2545 Expect(Token::LBRACE, CHECK_OK);
2546 }
2547
2548 tok = peek();
2549 }
2550
2551 if (tok == Token::FINALLY || !has_catch) {
2552 Consume(Token::FINALLY);
2553 // Declare a variable for holding the finally state while
2554 // executing the finally block.
2555 finally_block = ParseBlock(NULL, CHECK_OK);
2556 }
2557
2558 // Simplify the AST nodes by converting:
2559 // 'try { } catch { } finally { }'
2560 // to:
2561 // 'try { try { } catch { } } finally { }'
2562
2563 if (!is_pre_parsing_ && catch_block != NULL && finally_block != NULL) {
2564 TryCatch* statement = NEW(TryCatch(try_block, catch_var, catch_block));
2565 statement->set_escaping_targets(collector.targets());
2566 try_block = NEW(Block(NULL, 1, false));
2567 try_block->AddStatement(statement);
2568 catch_block = NULL;
2569 }
2570
2571 TryStatement* result = NULL;
2572 if (!is_pre_parsing_) {
2573 if (catch_block != NULL) {
2574 ASSERT(finally_block == NULL);
2575 result = NEW(TryCatch(try_block, catch_var, catch_block));
2576 result->set_escaping_targets(collector.targets());
2577 } else {
2578 ASSERT(finally_block != NULL);
2579 result = NEW(TryFinally(try_block, finally_block));
2580 // Add the jump targets of the try block and the catch block.
2581 for (int i = 0; i < collector.targets()->length(); i++) {
2582 catch_collector.AddTarget(collector.targets()->at(i));
2583 }
2584 result->set_escaping_targets(catch_collector.targets());
2585 }
2586 }
2587
2588 return result;
2589 }
2590
2591
ParseDoStatement(ZoneStringList * labels,bool * ok)2592 LoopStatement* Parser::ParseDoStatement(ZoneStringList* labels, bool* ok) {
2593 // DoStatement ::
2594 // 'do' Statement 'while' '(' Expression ')' ';'
2595
2596 LoopStatement* loop = NEW(LoopStatement(labels, LoopStatement::DO_LOOP));
2597 Target target(this, loop);
2598
2599 Expect(Token::DO, CHECK_OK);
2600 Statement* body = ParseStatement(NULL, CHECK_OK);
2601 Expect(Token::WHILE, CHECK_OK);
2602 Expect(Token::LPAREN, CHECK_OK);
2603 Expression* cond = ParseExpression(true, CHECK_OK);
2604 Expect(Token::RPAREN, CHECK_OK);
2605
2606 // Allow do-statements to be terminated with and without
2607 // semi-colons. This allows code such as 'do;while(0)return' to
2608 // parse, which would not be the case if we had used the
2609 // ExpectSemicolon() functionality here.
2610 if (peek() == Token::SEMICOLON) Consume(Token::SEMICOLON);
2611
2612 if (loop) loop->Initialize(NULL, cond, NULL, body);
2613 return loop;
2614 }
2615
2616
ParseWhileStatement(ZoneStringList * labels,bool * ok)2617 LoopStatement* Parser::ParseWhileStatement(ZoneStringList* labels, bool* ok) {
2618 // WhileStatement ::
2619 // 'while' '(' Expression ')' Statement
2620
2621 LoopStatement* loop = NEW(LoopStatement(labels, LoopStatement::WHILE_LOOP));
2622 Target target(this, loop);
2623
2624 Expect(Token::WHILE, CHECK_OK);
2625 Expect(Token::LPAREN, CHECK_OK);
2626 Expression* cond = ParseExpression(true, CHECK_OK);
2627 Expect(Token::RPAREN, CHECK_OK);
2628 Statement* body = ParseStatement(NULL, CHECK_OK);
2629
2630 if (loop) loop->Initialize(NULL, cond, NULL, body);
2631 return loop;
2632 }
2633
2634
ParseForStatement(ZoneStringList * labels,bool * ok)2635 Statement* Parser::ParseForStatement(ZoneStringList* labels, bool* ok) {
2636 // ForStatement ::
2637 // 'for' '(' Expression? ';' Expression? ';' Expression? ')' Statement
2638
2639 Statement* init = NULL;
2640
2641 Expect(Token::FOR, CHECK_OK);
2642 Expect(Token::LPAREN, CHECK_OK);
2643 if (peek() != Token::SEMICOLON) {
2644 if (peek() == Token::VAR || peek() == Token::CONST) {
2645 Expression* each = NULL;
2646 Block* variable_statement =
2647 ParseVariableDeclarations(false, &each, CHECK_OK);
2648 if (peek() == Token::IN && each != NULL) {
2649 ForInStatement* loop = NEW(ForInStatement(labels));
2650 Target target(this, loop);
2651
2652 Expect(Token::IN, CHECK_OK);
2653 Expression* enumerable = ParseExpression(true, CHECK_OK);
2654 Expect(Token::RPAREN, CHECK_OK);
2655
2656 Statement* body = ParseStatement(NULL, CHECK_OK);
2657 if (is_pre_parsing_) {
2658 return NULL;
2659 } else {
2660 loop->Initialize(each, enumerable, body);
2661 Block* result = NEW(Block(NULL, 2, false));
2662 result->AddStatement(variable_statement);
2663 result->AddStatement(loop);
2664 // Parsed for-in loop w/ variable/const declaration.
2665 return result;
2666 }
2667
2668 } else {
2669 init = variable_statement;
2670 }
2671
2672 } else {
2673 Expression* expression = ParseExpression(false, CHECK_OK);
2674 if (peek() == Token::IN) {
2675 // Report syntax error if the expression is an invalid
2676 // left-hand side expression.
2677 if (expression == NULL || !expression->IsValidLeftHandSide()) {
2678 if (expression != NULL && expression->AsCall() != NULL) {
2679 // According to ECMA-262 host function calls are permitted to
2680 // return references. This cannot happen in our system so we
2681 // will always get an error. We could report this as a syntax
2682 // error here but for compatibility with KJS and SpiderMonkey we
2683 // choose to report the error at runtime.
2684 Handle<String> type = Factory::invalid_lhs_in_for_in_symbol();
2685 expression = NewThrowReferenceError(type);
2686 } else {
2687 // Invalid left hand side expressions that are not function
2688 // calls are reported as syntax errors at compile time.
2689 ReportMessage("invalid_lhs_in_for_in",
2690 Vector<const char*>::empty());
2691 *ok = false;
2692 return NULL;
2693 }
2694 }
2695 ForInStatement* loop = NEW(ForInStatement(labels));
2696 Target target(this, loop);
2697
2698 Expect(Token::IN, CHECK_OK);
2699 Expression* enumerable = ParseExpression(true, CHECK_OK);
2700 Expect(Token::RPAREN, CHECK_OK);
2701
2702 Statement* body = ParseStatement(NULL, CHECK_OK);
2703 if (loop) loop->Initialize(expression, enumerable, body);
2704
2705 // Parsed for-in loop.
2706 return loop;
2707
2708 } else {
2709 init = NEW(ExpressionStatement(expression));
2710 }
2711 }
2712 }
2713
2714 // Standard 'for' loop
2715 LoopStatement* loop = NEW(LoopStatement(labels, LoopStatement::FOR_LOOP));
2716 Target target(this, loop);
2717
2718 // Parsed initializer at this point.
2719 Expect(Token::SEMICOLON, CHECK_OK);
2720
2721 Expression* cond = NULL;
2722 if (peek() != Token::SEMICOLON) {
2723 cond = ParseExpression(true, CHECK_OK);
2724 }
2725 Expect(Token::SEMICOLON, CHECK_OK);
2726
2727 Statement* next = NULL;
2728 if (peek() != Token::RPAREN) {
2729 Expression* exp = ParseExpression(true, CHECK_OK);
2730 next = NEW(ExpressionStatement(exp));
2731 }
2732 Expect(Token::RPAREN, CHECK_OK);
2733
2734 Statement* body = ParseStatement(NULL, CHECK_OK);
2735
2736 if (loop) loop->Initialize(init, cond, next, body);
2737 return loop;
2738 }
2739
2740
2741 // Precedence = 1
ParseExpression(bool accept_IN,bool * ok)2742 Expression* Parser::ParseExpression(bool accept_IN, bool* ok) {
2743 // Expression ::
2744 // AssignmentExpression
2745 // Expression ',' AssignmentExpression
2746
2747 Expression* result = ParseAssignmentExpression(accept_IN, CHECK_OK);
2748 while (peek() == Token::COMMA) {
2749 Expect(Token::COMMA, CHECK_OK);
2750 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK);
2751 result = NEW(BinaryOperation(Token::COMMA, result, right));
2752 }
2753 return result;
2754 }
2755
2756
2757 // Precedence = 2
ParseAssignmentExpression(bool accept_IN,bool * ok)2758 Expression* Parser::ParseAssignmentExpression(bool accept_IN, bool* ok) {
2759 // AssignmentExpression ::
2760 // ConditionalExpression
2761 // LeftHandSideExpression AssignmentOperator AssignmentExpression
2762
2763 Expression* expression = ParseConditionalExpression(accept_IN, CHECK_OK);
2764
2765 if (!Token::IsAssignmentOp(peek())) {
2766 // Parsed conditional expression only (no assignment).
2767 return expression;
2768 }
2769
2770 if (expression == NULL || !expression->IsValidLeftHandSide()) {
2771 if (expression != NULL && expression->AsCall() != NULL) {
2772 // According to ECMA-262 host function calls are permitted to
2773 // return references. This cannot happen in our system so we
2774 // will always get an error. We could report this as a syntax
2775 // error here but for compatibility with KJS and SpiderMonkey we
2776 // choose to report the error at runtime.
2777 Handle<String> type = Factory::invalid_lhs_in_assignment_symbol();
2778 expression = NewThrowReferenceError(type);
2779 } else {
2780 // Invalid left hand side expressions that are not function
2781 // calls are reported as syntax errors at compile time.
2782 //
2783 // NOTE: KJS sometimes delay the error reporting to runtime. If
2784 // we want to be completely compatible we should do the same.
2785 // For example: "(x++) = 42" gives a reference error at runtime
2786 // with KJS whereas we report a syntax error at compile time.
2787 ReportMessage("invalid_lhs_in_assignment", Vector<const char*>::empty());
2788 *ok = false;
2789 return NULL;
2790 }
2791 }
2792
2793
2794 Token::Value op = Next(); // Get assignment operator.
2795 int pos = scanner().location().beg_pos;
2796 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK);
2797
2798 // TODO(1231235): We try to estimate the set of properties set by
2799 // constructors. We define a new property whenever there is an
2800 // assignment to a property of 'this'. We should probably only add
2801 // properties if we haven't seen them before. Otherwise we'll
2802 // probably overestimate the number of properties.
2803 Property* property = expression ? expression->AsProperty() : NULL;
2804 if (op == Token::ASSIGN &&
2805 property != NULL &&
2806 property->obj()->AsVariableProxy() != NULL &&
2807 property->obj()->AsVariableProxy()->is_this()) {
2808 temp_scope_->AddProperty();
2809 }
2810
2811 return NEW(Assignment(op, expression, right, pos));
2812 }
2813
2814
2815 // Precedence = 3
ParseConditionalExpression(bool accept_IN,bool * ok)2816 Expression* Parser::ParseConditionalExpression(bool accept_IN, bool* ok) {
2817 // ConditionalExpression ::
2818 // LogicalOrExpression
2819 // LogicalOrExpression '?' AssignmentExpression ':' AssignmentExpression
2820
2821 // We start using the binary expression parser for prec >= 4 only!
2822 Expression* expression = ParseBinaryExpression(4, accept_IN, CHECK_OK);
2823 if (peek() != Token::CONDITIONAL) return expression;
2824 Consume(Token::CONDITIONAL);
2825 // In parsing the first assignment expression in conditional
2826 // expressions we always accept the 'in' keyword; see ECMA-262,
2827 // section 11.12, page 58.
2828 Expression* left = ParseAssignmentExpression(true, CHECK_OK);
2829 Expect(Token::COLON, CHECK_OK);
2830 Expression* right = ParseAssignmentExpression(accept_IN, CHECK_OK);
2831 return NEW(Conditional(expression, left, right));
2832 }
2833
2834
Precedence(Token::Value tok,bool accept_IN)2835 static int Precedence(Token::Value tok, bool accept_IN) {
2836 if (tok == Token::IN && !accept_IN)
2837 return 0; // 0 precedence will terminate binary expression parsing
2838
2839 return Token::Precedence(tok);
2840 }
2841
2842
2843 // Precedence >= 4
ParseBinaryExpression(int prec,bool accept_IN,bool * ok)2844 Expression* Parser::ParseBinaryExpression(int prec, bool accept_IN, bool* ok) {
2845 ASSERT(prec >= 4);
2846 Expression* x = ParseUnaryExpression(CHECK_OK);
2847 for (int prec1 = Precedence(peek(), accept_IN); prec1 >= prec; prec1--) {
2848 // prec1 >= 4
2849 while (Precedence(peek(), accept_IN) == prec1) {
2850 Token::Value op = Next();
2851 Expression* y = ParseBinaryExpression(prec1 + 1, accept_IN, CHECK_OK);
2852
2853 // Compute some expressions involving only number literals.
2854 if (x && x->AsLiteral() && x->AsLiteral()->handle()->IsNumber() &&
2855 y && y->AsLiteral() && y->AsLiteral()->handle()->IsNumber()) {
2856 double x_val = x->AsLiteral()->handle()->Number();
2857 double y_val = y->AsLiteral()->handle()->Number();
2858
2859 switch (op) {
2860 case Token::ADD:
2861 x = NewNumberLiteral(x_val + y_val);
2862 continue;
2863 case Token::SUB:
2864 x = NewNumberLiteral(x_val - y_val);
2865 continue;
2866 case Token::MUL:
2867 x = NewNumberLiteral(x_val * y_val);
2868 continue;
2869 case Token::DIV:
2870 x = NewNumberLiteral(x_val / y_val);
2871 continue;
2872 case Token::BIT_OR:
2873 x = NewNumberLiteral(DoubleToInt32(x_val) | DoubleToInt32(y_val));
2874 continue;
2875 case Token::BIT_AND:
2876 x = NewNumberLiteral(DoubleToInt32(x_val) & DoubleToInt32(y_val));
2877 continue;
2878 case Token::BIT_XOR:
2879 x = NewNumberLiteral(DoubleToInt32(x_val) ^ DoubleToInt32(y_val));
2880 continue;
2881 case Token::SHL: {
2882 int value = DoubleToInt32(x_val) << (DoubleToInt32(y_val) & 0x1f);
2883 x = NewNumberLiteral(value);
2884 continue;
2885 }
2886 case Token::SHR: {
2887 uint32_t shift = DoubleToInt32(y_val) & 0x1f;
2888 uint32_t value = DoubleToUint32(x_val) >> shift;
2889 x = NewNumberLiteral(value);
2890 continue;
2891 }
2892 case Token::SAR: {
2893 uint32_t shift = DoubleToInt32(y_val) & 0x1f;
2894 int value = ArithmeticShiftRight(DoubleToInt32(x_val), shift);
2895 x = NewNumberLiteral(value);
2896 continue;
2897 }
2898 default:
2899 break;
2900 }
2901 }
2902
2903 // Convert constant divisions to multiplications for speed.
2904 if (op == Token::DIV &&
2905 y && y->AsLiteral() && y->AsLiteral()->handle()->IsNumber()) {
2906 double y_val = y->AsLiteral()->handle()->Number();
2907 int64_t y_int = static_cast<int64_t>(y_val);
2908 // There are rounding issues with this optimization, but they don't
2909 // apply if the number to be divided with has a reciprocal that can be
2910 // precisely represented as a floating point number. This is the case
2911 // if the number is an integer power of 2. Negative integer powers of
2912 // 2 work too, but for -2, -1, 1 and 2 we don't do the strength
2913 // reduction because the inlined optimistic idiv has a reasonable
2914 // chance of succeeding by producing a Smi answer with no remainder.
2915 if (static_cast<double>(y_int) == y_val &&
2916 (IsPowerOf2(y_int) || IsPowerOf2(-y_int)) &&
2917 (y_int > 2 || y_int < -2)) {
2918 y = NewNumberLiteral(1 / y_val);
2919 op = Token::MUL;
2920 }
2921 }
2922
2923 // For now we distinguish between comparisons and other binary
2924 // operations. (We could combine the two and get rid of this
2925 // code an AST node eventually.)
2926 if (Token::IsCompareOp(op)) {
2927 // We have a comparison.
2928 Token::Value cmp = op;
2929 switch (op) {
2930 case Token::NE: cmp = Token::EQ; break;
2931 case Token::NE_STRICT: cmp = Token::EQ_STRICT; break;
2932 default: break;
2933 }
2934 x = NEW(CompareOperation(cmp, x, y));
2935 if (cmp != op) {
2936 // The comparison was negated - add a NOT.
2937 x = NEW(UnaryOperation(Token::NOT, x));
2938 }
2939
2940 } else {
2941 // We have a "normal" binary operation.
2942 x = NEW(BinaryOperation(op, x, y));
2943 }
2944 }
2945 }
2946 return x;
2947 }
2948
2949
ParseUnaryExpression(bool * ok)2950 Expression* Parser::ParseUnaryExpression(bool* ok) {
2951 // UnaryExpression ::
2952 // PostfixExpression
2953 // 'delete' UnaryExpression
2954 // 'void' UnaryExpression
2955 // 'typeof' UnaryExpression
2956 // '++' UnaryExpression
2957 // '--' UnaryExpression
2958 // '+' UnaryExpression
2959 // '-' UnaryExpression
2960 // '~' UnaryExpression
2961 // '!' UnaryExpression
2962
2963 Token::Value op = peek();
2964 if (Token::IsUnaryOp(op)) {
2965 op = Next();
2966 Expression* x = ParseUnaryExpression(CHECK_OK);
2967
2968 // Compute some expressions involving only number literals.
2969 if (x && x->AsLiteral() && x->AsLiteral()->handle()->IsNumber()) {
2970 double x_val = x->AsLiteral()->handle()->Number();
2971 switch (op) {
2972 case Token::ADD:
2973 return x;
2974 case Token::SUB:
2975 return NewNumberLiteral(-x_val);
2976 case Token::BIT_NOT:
2977 return NewNumberLiteral(~DoubleToInt32(x_val));
2978 default: break;
2979 }
2980 }
2981
2982 return NEW(UnaryOperation(op, x));
2983
2984 } else if (Token::IsCountOp(op)) {
2985 op = Next();
2986 Expression* x = ParseUnaryExpression(CHECK_OK);
2987 if (x == NULL || !x->IsValidLeftHandSide()) {
2988 if (x != NULL && x->AsCall() != NULL) {
2989 // According to ECMA-262 host function calls are permitted to
2990 // return references. This cannot happen in our system so we
2991 // will always get an error. We could report this as a syntax
2992 // error here but for compatibility with KJS and SpiderMonkey we
2993 // choose to report the error at runtime.
2994 Handle<String> type = Factory::invalid_lhs_in_prefix_op_symbol();
2995 x = NewThrowReferenceError(type);
2996 } else {
2997 // Invalid left hand side expressions that are not function
2998 // calls are reported as syntax errors at compile time.
2999 ReportMessage("invalid_lhs_in_prefix_op", Vector<const char*>::empty());
3000 *ok = false;
3001 return NULL;
3002 }
3003 }
3004 return NEW(CountOperation(true /* prefix */, op, x));
3005
3006 } else {
3007 return ParsePostfixExpression(ok);
3008 }
3009 }
3010
3011
ParsePostfixExpression(bool * ok)3012 Expression* Parser::ParsePostfixExpression(bool* ok) {
3013 // PostfixExpression ::
3014 // LeftHandSideExpression ('++' | '--')?
3015
3016 Expression* result = ParseLeftHandSideExpression(CHECK_OK);
3017 if (!scanner_.has_line_terminator_before_next() && Token::IsCountOp(peek())) {
3018 if (result == NULL || !result->IsValidLeftHandSide()) {
3019 if (result != NULL && result->AsCall() != NULL) {
3020 // According to ECMA-262 host function calls are permitted to
3021 // return references. This cannot happen in our system so we
3022 // will always get an error. We could report this as a syntax
3023 // error here but for compatibility with KJS and SpiderMonkey we
3024 // choose to report the error at runtime.
3025 Handle<String> type = Factory::invalid_lhs_in_postfix_op_symbol();
3026 result = NewThrowReferenceError(type);
3027 } else {
3028 // Invalid left hand side expressions that are not function
3029 // calls are reported as syntax errors at compile time.
3030 ReportMessage("invalid_lhs_in_postfix_op",
3031 Vector<const char*>::empty());
3032 *ok = false;
3033 return NULL;
3034 }
3035 }
3036 Token::Value next = Next();
3037 result = NEW(CountOperation(false /* postfix */, next, result));
3038 }
3039 return result;
3040 }
3041
3042
ParseLeftHandSideExpression(bool * ok)3043 Expression* Parser::ParseLeftHandSideExpression(bool* ok) {
3044 // LeftHandSideExpression ::
3045 // (NewExpression | MemberExpression) ...
3046
3047 Expression* result;
3048 if (peek() == Token::NEW) {
3049 result = ParseNewExpression(CHECK_OK);
3050 } else {
3051 result = ParseMemberExpression(CHECK_OK);
3052 }
3053
3054 while (true) {
3055 switch (peek()) {
3056 case Token::LBRACK: {
3057 Consume(Token::LBRACK);
3058 int pos = scanner().location().beg_pos;
3059 Expression* index = ParseExpression(true, CHECK_OK);
3060 result = factory()->NewProperty(result, index, pos);
3061 Expect(Token::RBRACK, CHECK_OK);
3062 break;
3063 }
3064
3065 case Token::LPAREN: {
3066 int pos = scanner().location().beg_pos;
3067 ZoneList<Expression*>* args = ParseArguments(CHECK_OK);
3068
3069 // Keep track of eval() calls since they disable all local variable
3070 // optimizations.
3071 // The calls that need special treatment are the
3072 // direct (i.e. not aliased) eval calls. These calls are all of the
3073 // form eval(...) with no explicit receiver object where eval is not
3074 // declared in the current scope chain. These calls are marked as
3075 // potentially direct eval calls. Whether they are actually direct calls
3076 // to eval is determined at run time.
3077
3078 bool is_potentially_direct_eval = false;
3079 if (!is_pre_parsing_) {
3080 VariableProxy* callee = result->AsVariableProxy();
3081 if (callee != NULL && callee->IsVariable(Factory::eval_symbol())) {
3082 Handle<String> name = callee->name();
3083 Variable* var = top_scope_->Lookup(name);
3084 if (var == NULL) {
3085 top_scope_->RecordEvalCall();
3086 is_potentially_direct_eval = true;
3087 }
3088 }
3089 }
3090
3091 if (is_potentially_direct_eval) {
3092 result = factory()->NewCallEval(result, args, pos);
3093 } else {
3094 result = factory()->NewCall(result, args, pos);
3095 }
3096 break;
3097 }
3098
3099 case Token::PERIOD: {
3100 Consume(Token::PERIOD);
3101 int pos = scanner().location().beg_pos;
3102 Handle<String> name = ParseIdentifier(CHECK_OK);
3103 result = factory()->NewProperty(result, NEW(Literal(name)), pos);
3104 break;
3105 }
3106
3107 default:
3108 return result;
3109 }
3110 }
3111 }
3112
3113
3114
ParseNewPrefix(PositionStack * stack,bool * ok)3115 Expression* Parser::ParseNewPrefix(PositionStack* stack, bool* ok) {
3116 // NewExpression ::
3117 // ('new')+ MemberExpression
3118
3119 // The grammar for new expressions is pretty warped. The keyword
3120 // 'new' can either be a part of the new expression (where it isn't
3121 // followed by an argument list) or a part of the member expression,
3122 // where it must be followed by an argument list. To accommodate
3123 // this, we parse the 'new' keywords greedily and keep track of how
3124 // many we have parsed. This information is then passed on to the
3125 // member expression parser, which is only allowed to match argument
3126 // lists as long as it has 'new' prefixes left
3127 Expect(Token::NEW, CHECK_OK);
3128 PositionStack::Element pos(stack, scanner().location().beg_pos);
3129
3130 Expression* result;
3131 if (peek() == Token::NEW) {
3132 result = ParseNewPrefix(stack, CHECK_OK);
3133 } else {
3134 result = ParseMemberWithNewPrefixesExpression(stack, CHECK_OK);
3135 }
3136
3137 if (!stack->is_empty()) {
3138 int last = stack->pop();
3139 result = NEW(CallNew(result, new ZoneList<Expression*>(0), last));
3140 }
3141 return result;
3142 }
3143
3144
ParseNewExpression(bool * ok)3145 Expression* Parser::ParseNewExpression(bool* ok) {
3146 PositionStack stack(ok);
3147 return ParseNewPrefix(&stack, ok);
3148 }
3149
3150
ParseMemberExpression(bool * ok)3151 Expression* Parser::ParseMemberExpression(bool* ok) {
3152 return ParseMemberWithNewPrefixesExpression(NULL, ok);
3153 }
3154
3155
ParseMemberWithNewPrefixesExpression(PositionStack * stack,bool * ok)3156 Expression* Parser::ParseMemberWithNewPrefixesExpression(PositionStack* stack,
3157 bool* ok) {
3158 // MemberExpression ::
3159 // (PrimaryExpression | FunctionLiteral)
3160 // ('[' Expression ']' | '.' Identifier | Arguments)*
3161
3162 // Parse the initial primary or function expression.
3163 Expression* result = NULL;
3164 if (peek() == Token::FUNCTION) {
3165 Expect(Token::FUNCTION, CHECK_OK);
3166 int function_token_position = scanner().location().beg_pos;
3167 Handle<String> name;
3168 if (peek() == Token::IDENTIFIER) name = ParseIdentifier(CHECK_OK);
3169 result = ParseFunctionLiteral(name, function_token_position,
3170 NESTED, CHECK_OK);
3171 } else {
3172 result = ParsePrimaryExpression(CHECK_OK);
3173 }
3174
3175 while (true) {
3176 switch (peek()) {
3177 case Token::LBRACK: {
3178 Consume(Token::LBRACK);
3179 int pos = scanner().location().beg_pos;
3180 Expression* index = ParseExpression(true, CHECK_OK);
3181 result = factory()->NewProperty(result, index, pos);
3182 Expect(Token::RBRACK, CHECK_OK);
3183 break;
3184 }
3185 case Token::PERIOD: {
3186 Consume(Token::PERIOD);
3187 int pos = scanner().location().beg_pos;
3188 Handle<String> name = ParseIdentifier(CHECK_OK);
3189 result = factory()->NewProperty(result, NEW(Literal(name)), pos);
3190 break;
3191 }
3192 case Token::LPAREN: {
3193 if ((stack == NULL) || stack->is_empty()) return result;
3194 // Consume one of the new prefixes (already parsed).
3195 ZoneList<Expression*>* args = ParseArguments(CHECK_OK);
3196 int last = stack->pop();
3197 result = NEW(CallNew(result, args, last));
3198 break;
3199 }
3200 default:
3201 return result;
3202 }
3203 }
3204 }
3205
3206
ParseDebuggerStatement(bool * ok)3207 DebuggerStatement* Parser::ParseDebuggerStatement(bool* ok) {
3208 // In ECMA-262 'debugger' is defined as a reserved keyword. In some browser
3209 // contexts this is used as a statement which invokes the debugger as i a
3210 // break point is present.
3211 // DebuggerStatement ::
3212 // 'debugger' ';'
3213
3214 Expect(Token::DEBUGGER, CHECK_OK);
3215 ExpectSemicolon(CHECK_OK);
3216 return NEW(DebuggerStatement());
3217 }
3218
3219
ReportUnexpectedToken(Token::Value token)3220 void Parser::ReportUnexpectedToken(Token::Value token) {
3221 // We don't report stack overflows here, to avoid increasing the
3222 // stack depth even further. Instead we report it after parsing is
3223 // over, in ParseProgram.
3224 if (token == Token::ILLEGAL && scanner().stack_overflow())
3225 return;
3226 // Four of the tokens are treated specially
3227 switch (token) {
3228 case Token::EOS:
3229 return ReportMessage("unexpected_eos", Vector<const char*>::empty());
3230 case Token::NUMBER:
3231 return ReportMessage("unexpected_token_number",
3232 Vector<const char*>::empty());
3233 case Token::STRING:
3234 return ReportMessage("unexpected_token_string",
3235 Vector<const char*>::empty());
3236 case Token::IDENTIFIER:
3237 return ReportMessage("unexpected_token_identifier",
3238 Vector<const char*>::empty());
3239 default:
3240 const char* name = Token::String(token);
3241 ASSERT(name != NULL);
3242 ReportMessage("unexpected_token", Vector<const char*>(&name, 1));
3243 }
3244 }
3245
3246
ParsePrimaryExpression(bool * ok)3247 Expression* Parser::ParsePrimaryExpression(bool* ok) {
3248 // PrimaryExpression ::
3249 // 'this'
3250 // 'null'
3251 // 'true'
3252 // 'false'
3253 // Identifier
3254 // Number
3255 // String
3256 // ArrayLiteral
3257 // ObjectLiteral
3258 // RegExpLiteral
3259 // '(' Expression ')'
3260
3261 Expression* result = NULL;
3262 switch (peek()) {
3263 case Token::THIS: {
3264 Consume(Token::THIS);
3265 if (is_pre_parsing_) {
3266 result = VariableProxySentinel::this_proxy();
3267 } else {
3268 VariableProxy* recv = top_scope_->receiver();
3269 recv->var_uses()->RecordRead(1);
3270 result = recv;
3271 }
3272 break;
3273 }
3274
3275 case Token::NULL_LITERAL:
3276 Consume(Token::NULL_LITERAL);
3277 result = NEW(Literal(Factory::null_value()));
3278 break;
3279
3280 case Token::TRUE_LITERAL:
3281 Consume(Token::TRUE_LITERAL);
3282 result = NEW(Literal(Factory::true_value()));
3283 break;
3284
3285 case Token::FALSE_LITERAL:
3286 Consume(Token::FALSE_LITERAL);
3287 result = NEW(Literal(Factory::false_value()));
3288 break;
3289
3290 case Token::IDENTIFIER: {
3291 Handle<String> name = ParseIdentifier(CHECK_OK);
3292 if (is_pre_parsing_) {
3293 result = VariableProxySentinel::identifier_proxy();
3294 } else {
3295 result = top_scope_->NewUnresolved(name, inside_with());
3296 }
3297 break;
3298 }
3299
3300 case Token::NUMBER: {
3301 Consume(Token::NUMBER);
3302 double value =
3303 StringToDouble(scanner_.literal_string(), ALLOW_HEX | ALLOW_OCTALS);
3304 result = NewNumberLiteral(value);
3305 break;
3306 }
3307
3308 case Token::STRING: {
3309 Consume(Token::STRING);
3310 Handle<String> symbol =
3311 factory()->LookupSymbol(scanner_.literal_string(),
3312 scanner_.literal_length());
3313 result = NEW(Literal(symbol));
3314 break;
3315 }
3316
3317 case Token::ASSIGN_DIV:
3318 result = ParseRegExpLiteral(true, CHECK_OK);
3319 break;
3320
3321 case Token::DIV:
3322 result = ParseRegExpLiteral(false, CHECK_OK);
3323 break;
3324
3325 case Token::LBRACK:
3326 result = ParseArrayLiteral(CHECK_OK);
3327 break;
3328
3329 case Token::LBRACE:
3330 result = ParseObjectLiteral(CHECK_OK);
3331 break;
3332
3333 case Token::LPAREN:
3334 Consume(Token::LPAREN);
3335 result = ParseExpression(true, CHECK_OK);
3336 Expect(Token::RPAREN, CHECK_OK);
3337 break;
3338
3339 case Token::MOD:
3340 if (allow_natives_syntax_ || extension_ != NULL) {
3341 result = ParseV8Intrinsic(CHECK_OK);
3342 break;
3343 }
3344 // If we're not allowing special syntax we fall-through to the
3345 // default case.
3346
3347 default: {
3348 Token::Value tok = peek();
3349 // Token::Peek returns the value of the next token but
3350 // location() gives info about the current token.
3351 // Therefore, we need to read ahead to the next token
3352 Next();
3353 ReportUnexpectedToken(tok);
3354 *ok = false;
3355 return NULL;
3356 }
3357 }
3358
3359 return result;
3360 }
3361
3362
ParseArrayLiteral(bool * ok)3363 Expression* Parser::ParseArrayLiteral(bool* ok) {
3364 // ArrayLiteral ::
3365 // '[' Expression? (',' Expression?)* ']'
3366
3367 ZoneListWrapper<Expression> values = factory()->NewList<Expression>(4);
3368 Expect(Token::LBRACK, CHECK_OK);
3369 while (peek() != Token::RBRACK) {
3370 Expression* elem;
3371 if (peek() == Token::COMMA) {
3372 elem = GetLiteralTheHole();
3373 } else {
3374 elem = ParseAssignmentExpression(true, CHECK_OK);
3375 }
3376 values.Add(elem);
3377 if (peek() != Token::RBRACK) {
3378 Expect(Token::COMMA, CHECK_OK);
3379 }
3380 }
3381 Expect(Token::RBRACK, CHECK_OK);
3382
3383 // Update the scope information before the pre-parsing bailout.
3384 temp_scope_->set_contains_array_literal();
3385 int literal_index = temp_scope_->NextMaterializedLiteralIndex();
3386
3387 if (is_pre_parsing_) return NULL;
3388
3389 // Allocate a fixed array with all the literals.
3390 Handle<FixedArray> literals =
3391 Factory::NewFixedArray(values.length(), TENURED);
3392
3393 // Fill in the literals.
3394 bool is_simple = true;
3395 int depth = 1;
3396 for (int i = 0; i < values.length(); i++) {
3397 MaterializedLiteral* m_literal = values.at(i)->AsMaterializedLiteral();
3398 if (m_literal != NULL && m_literal->depth() + 1 > depth) {
3399 depth = m_literal->depth() + 1;
3400 }
3401 Handle<Object> boilerplate_value = GetBoilerplateValue(values.at(i));
3402 if (boilerplate_value->IsUndefined()) {
3403 literals->set_the_hole(i);
3404 is_simple = false;
3405 } else {
3406 literals->set(i, *boilerplate_value);
3407 }
3408 }
3409
3410 return NEW(ArrayLiteral(literals, values.elements(),
3411 literal_index, is_simple, depth));
3412 }
3413
3414
IsBoilerplateProperty(ObjectLiteral::Property * property)3415 bool Parser::IsBoilerplateProperty(ObjectLiteral::Property* property) {
3416 return property != NULL &&
3417 property->kind() != ObjectLiteral::Property::PROTOTYPE;
3418 }
3419
3420
IsCompileTimeValue(Expression * expression)3421 bool CompileTimeValue::IsCompileTimeValue(Expression* expression) {
3422 MaterializedLiteral* lit = expression->AsMaterializedLiteral();
3423 return lit != NULL && lit->is_simple();
3424 }
3425
GetValue(Expression * expression)3426 Handle<FixedArray> CompileTimeValue::GetValue(Expression* expression) {
3427 ASSERT(IsCompileTimeValue(expression));
3428 Handle<FixedArray> result = Factory::NewFixedArray(2, TENURED);
3429 ObjectLiteral* object_literal = expression->AsObjectLiteral();
3430 if (object_literal != NULL) {
3431 ASSERT(object_literal->is_simple());
3432 result->set(kTypeSlot, Smi::FromInt(OBJECT_LITERAL));
3433 result->set(kElementsSlot, *object_literal->constant_properties());
3434 } else {
3435 ArrayLiteral* array_literal = expression->AsArrayLiteral();
3436 ASSERT(array_literal != NULL && array_literal->is_simple());
3437 result->set(kTypeSlot, Smi::FromInt(ARRAY_LITERAL));
3438 result->set(kElementsSlot, *array_literal->literals());
3439 }
3440 return result;
3441 }
3442
3443
GetType(Handle<FixedArray> value)3444 CompileTimeValue::Type CompileTimeValue::GetType(Handle<FixedArray> value) {
3445 Smi* type_value = Smi::cast(value->get(kTypeSlot));
3446 return static_cast<Type>(type_value->value());
3447 }
3448
3449
GetElements(Handle<FixedArray> value)3450 Handle<FixedArray> CompileTimeValue::GetElements(Handle<FixedArray> value) {
3451 return Handle<FixedArray>(FixedArray::cast(value->get(kElementsSlot)));
3452 }
3453
3454
GetBoilerplateValue(Expression * expression)3455 Handle<Object> Parser::GetBoilerplateValue(Expression* expression) {
3456 if (expression->AsLiteral() != NULL) {
3457 return expression->AsLiteral()->handle();
3458 }
3459 if (CompileTimeValue::IsCompileTimeValue(expression)) {
3460 return CompileTimeValue::GetValue(expression);
3461 }
3462 return Factory::undefined_value();
3463 }
3464
3465
ParseObjectLiteral(bool * ok)3466 Expression* Parser::ParseObjectLiteral(bool* ok) {
3467 // ObjectLiteral ::
3468 // '{' (
3469 // ((Identifier | String | Number) ':' AssignmentExpression)
3470 // | (('get' | 'set') FunctionLiteral)
3471 // )*[','] '}'
3472
3473 ZoneListWrapper<ObjectLiteral::Property> properties =
3474 factory()->NewList<ObjectLiteral::Property>(4);
3475 int number_of_boilerplate_properties = 0;
3476
3477 Expect(Token::LBRACE, CHECK_OK);
3478 while (peek() != Token::RBRACE) {
3479 Literal* key = NULL;
3480 switch (peek()) {
3481 case Token::IDENTIFIER: {
3482 // Store identifier keys as literal symbols to avoid
3483 // resolving them when compiling code for the object
3484 // literal.
3485 bool is_getter = false;
3486 bool is_setter = false;
3487 Handle<String> id =
3488 ParseIdentifierOrGetOrSet(&is_getter, &is_setter, CHECK_OK);
3489 if (is_getter || is_setter) {
3490 // Special handling of getter and setter syntax.
3491 if (peek() == Token::IDENTIFIER) {
3492 Handle<String> name = ParseIdentifier(CHECK_OK);
3493 FunctionLiteral* value =
3494 ParseFunctionLiteral(name, RelocInfo::kNoPosition,
3495 DECLARATION, CHECK_OK);
3496 ObjectLiteral::Property* property =
3497 NEW(ObjectLiteral::Property(is_getter, value));
3498 if (IsBoilerplateProperty(property))
3499 number_of_boilerplate_properties++;
3500 properties.Add(property);
3501 if (peek() != Token::RBRACE) Expect(Token::COMMA, CHECK_OK);
3502 continue; // restart the while
3503 }
3504 }
3505 key = NEW(Literal(id));
3506 break;
3507 }
3508
3509 case Token::STRING: {
3510 Consume(Token::STRING);
3511 Handle<String> string =
3512 factory()->LookupSymbol(scanner_.literal_string(),
3513 scanner_.literal_length());
3514 uint32_t index;
3515 if (!string.is_null() && string->AsArrayIndex(&index)) {
3516 key = NewNumberLiteral(index);
3517 } else {
3518 key = NEW(Literal(string));
3519 }
3520 break;
3521 }
3522
3523 case Token::NUMBER: {
3524 Consume(Token::NUMBER);
3525 double value =
3526 StringToDouble(scanner_.literal_string(), ALLOW_HEX | ALLOW_OCTALS);
3527 key = NewNumberLiteral(value);
3528 break;
3529 }
3530
3531 default:
3532 Expect(Token::RBRACE, CHECK_OK);
3533 break;
3534 }
3535
3536 Expect(Token::COLON, CHECK_OK);
3537 Expression* value = ParseAssignmentExpression(true, CHECK_OK);
3538
3539 ObjectLiteral::Property* property =
3540 NEW(ObjectLiteral::Property(key, value));
3541
3542 // Count CONSTANT or COMPUTED properties to maintain the enumeration order.
3543 if (IsBoilerplateProperty(property)) number_of_boilerplate_properties++;
3544 properties.Add(property);
3545
3546 // TODO(1240767): Consider allowing trailing comma.
3547 if (peek() != Token::RBRACE) Expect(Token::COMMA, CHECK_OK);
3548 }
3549 Expect(Token::RBRACE, CHECK_OK);
3550 // Computation of literal_index must happen before pre parse bailout.
3551 int literal_index = temp_scope_->NextMaterializedLiteralIndex();
3552 if (is_pre_parsing_) return NULL;
3553
3554 Handle<FixedArray> constant_properties =
3555 Factory::NewFixedArray(number_of_boilerplate_properties * 2, TENURED);
3556 int position = 0;
3557 bool is_simple = true;
3558 int depth = 1;
3559 for (int i = 0; i < properties.length(); i++) {
3560 ObjectLiteral::Property* property = properties.at(i);
3561 if (!IsBoilerplateProperty(property)) {
3562 is_simple = false;
3563 continue;
3564 }
3565 MaterializedLiteral* m_literal = property->value()->AsMaterializedLiteral();
3566 if (m_literal != NULL && m_literal->depth() + 1 > depth) {
3567 depth = m_literal->depth() + 1;
3568 }
3569
3570 // Add CONSTANT and COMPUTED properties to boilerplate. Use undefined
3571 // value for COMPUTED properties, the real value is filled in at
3572 // runtime. The enumeration order is maintained.
3573 Handle<Object> key = property->key()->handle();
3574 Handle<Object> value = GetBoilerplateValue(property->value());
3575 is_simple = is_simple && !value->IsUndefined();
3576
3577 // Add name, value pair to the fixed array.
3578 constant_properties->set(position++, *key);
3579 constant_properties->set(position++, *value);
3580 }
3581
3582 return new ObjectLiteral(constant_properties,
3583 properties.elements(),
3584 literal_index,
3585 is_simple,
3586 depth);
3587 }
3588
3589
ParseRegExpLiteral(bool seen_equal,bool * ok)3590 Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
3591 if (!scanner_.ScanRegExpPattern(seen_equal)) {
3592 Next();
3593 ReportMessage("unterminated_regexp", Vector<const char*>::empty());
3594 *ok = false;
3595 return NULL;
3596 }
3597
3598 int literal_index = temp_scope_->NextMaterializedLiteralIndex();
3599
3600 if (is_pre_parsing_) {
3601 // If we're preparsing we just do all the parsing stuff without
3602 // building anything.
3603 if (!scanner_.ScanRegExpFlags()) {
3604 Next();
3605 ReportMessage("invalid_regexp_flags", Vector<const char*>::empty());
3606 *ok = false;
3607 return NULL;
3608 }
3609 Next();
3610 return NULL;
3611 }
3612
3613 Handle<String> js_pattern =
3614 Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
3615 scanner_.ScanRegExpFlags();
3616 Handle<String> js_flags =
3617 Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
3618 Next();
3619
3620 return new RegExpLiteral(js_pattern, js_flags, literal_index);
3621 }
3622
3623
ParseArguments(bool * ok)3624 ZoneList<Expression*>* Parser::ParseArguments(bool* ok) {
3625 // Arguments ::
3626 // '(' (AssignmentExpression)*[','] ')'
3627
3628 ZoneListWrapper<Expression> result = factory()->NewList<Expression>(4);
3629 Expect(Token::LPAREN, CHECK_OK);
3630 bool done = (peek() == Token::RPAREN);
3631 while (!done) {
3632 Expression* argument = ParseAssignmentExpression(true, CHECK_OK);
3633 result.Add(argument);
3634 done = (peek() == Token::RPAREN);
3635 if (!done) Expect(Token::COMMA, CHECK_OK);
3636 }
3637 Expect(Token::RPAREN, CHECK_OK);
3638 return result.elements();
3639 }
3640
3641
ParseFunctionLiteral(Handle<String> var_name,int function_token_position,FunctionLiteralType type,bool * ok)3642 FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
3643 int function_token_position,
3644 FunctionLiteralType type,
3645 bool* ok) {
3646 // Function ::
3647 // '(' FormalParameterList? ')' '{' FunctionBody '}'
3648
3649 bool is_named = !var_name.is_null();
3650
3651 // The name associated with this function. If it's a function expression,
3652 // this is the actual function name, otherwise this is the name of the
3653 // variable declared and initialized with the function (expression). In
3654 // that case, we don't have a function name (it's empty).
3655 Handle<String> name = is_named ? var_name : factory()->EmptySymbol();
3656 // The function name, if any.
3657 Handle<String> function_name = factory()->EmptySymbol();
3658 if (is_named && (type == EXPRESSION || type == NESTED)) {
3659 function_name = name;
3660 }
3661
3662 int num_parameters = 0;
3663 // Parse function body.
3664 { Scope::Type type = Scope::FUNCTION_SCOPE;
3665 Scope* scope = factory()->NewScope(top_scope_, type, inside_with());
3666 LexicalScope lexical_scope(this, scope);
3667 TemporaryScope temp_scope(this);
3668 top_scope_->SetScopeName(name);
3669
3670 // FormalParameterList ::
3671 // '(' (Identifier)*[','] ')'
3672 Expect(Token::LPAREN, CHECK_OK);
3673 int start_pos = scanner_.location().beg_pos;
3674 bool done = (peek() == Token::RPAREN);
3675 while (!done) {
3676 Handle<String> param_name = ParseIdentifier(CHECK_OK);
3677 if (!is_pre_parsing_) {
3678 top_scope_->AddParameter(top_scope_->DeclareLocal(param_name,
3679 Variable::VAR));
3680 num_parameters++;
3681 }
3682 done = (peek() == Token::RPAREN);
3683 if (!done) Expect(Token::COMMA, CHECK_OK);
3684 }
3685 Expect(Token::RPAREN, CHECK_OK);
3686
3687 Expect(Token::LBRACE, CHECK_OK);
3688 ZoneListWrapper<Statement> body = factory()->NewList<Statement>(8);
3689
3690 // If we have a named function expression, we add a local variable
3691 // declaration to the body of the function with the name of the
3692 // function and let it refer to the function itself (closure).
3693 // NOTE: We create a proxy and resolve it here so that in the
3694 // future we can change the AST to only refer to VariableProxies
3695 // instead of Variables and Proxis as is the case now.
3696 if (!function_name.is_null() && function_name->length() > 0) {
3697 Variable* fvar = top_scope_->DeclareFunctionVar(function_name);
3698 VariableProxy* fproxy =
3699 top_scope_->NewUnresolved(function_name, inside_with());
3700 fproxy->BindTo(fvar);
3701 body.Add(new ExpressionStatement(
3702 new Assignment(Token::INIT_VAR, fproxy,
3703 NEW(ThisFunction()),
3704 RelocInfo::kNoPosition)));
3705 }
3706
3707 // Determine if the function will be lazily compiled. The mode can
3708 // only be PARSE_LAZILY if the --lazy flag is true.
3709 bool is_lazily_compiled =
3710 mode() == PARSE_LAZILY && top_scope_->HasTrivialOuterContext();
3711
3712 int materialized_literal_count;
3713 int expected_property_count;
3714 bool contains_array_literal;
3715 bool only_this_property_assignments;
3716 bool only_simple_this_property_assignments;
3717 Handle<FixedArray> this_property_assignments;
3718 if (is_lazily_compiled && pre_data() != NULL) {
3719 FunctionEntry entry = pre_data()->GetFunctionEnd(start_pos);
3720 int end_pos = entry.end_pos();
3721 Counters::total_preparse_skipped.Increment(end_pos - start_pos);
3722 scanner_.SeekForward(end_pos);
3723 materialized_literal_count = entry.literal_count();
3724 expected_property_count = entry.property_count();
3725 only_this_property_assignments = false;
3726 only_simple_this_property_assignments = false;
3727 this_property_assignments = Factory::empty_fixed_array();
3728 contains_array_literal = entry.contains_array_literal();
3729 } else {
3730 ParseSourceElements(&body, Token::RBRACE, CHECK_OK);
3731 materialized_literal_count = temp_scope.materialized_literal_count();
3732 expected_property_count = temp_scope.expected_property_count();
3733 contains_array_literal = temp_scope.contains_array_literal();
3734 only_this_property_assignments =
3735 temp_scope.only_this_property_assignments();
3736 only_simple_this_property_assignments =
3737 temp_scope.only_simple_this_property_assignments();
3738 this_property_assignments = temp_scope.this_property_assignments();
3739 }
3740
3741 Expect(Token::RBRACE, CHECK_OK);
3742 int end_pos = scanner_.location().end_pos;
3743
3744 FunctionEntry entry = log()->LogFunction(start_pos);
3745 if (entry.is_valid()) {
3746 entry.set_end_pos(end_pos);
3747 entry.set_literal_count(materialized_literal_count);
3748 entry.set_property_count(expected_property_count);
3749 entry.set_contains_array_literal(contains_array_literal);
3750 }
3751
3752 FunctionLiteral* function_literal =
3753 NEW(FunctionLiteral(name,
3754 top_scope_,
3755 body.elements(),
3756 materialized_literal_count,
3757 contains_array_literal,
3758 expected_property_count,
3759 only_this_property_assignments,
3760 only_simple_this_property_assignments,
3761 this_property_assignments,
3762 num_parameters,
3763 start_pos,
3764 end_pos,
3765 function_name->length() > 0));
3766 if (!is_pre_parsing_) {
3767 function_literal->set_function_token_position(function_token_position);
3768 }
3769 return function_literal;
3770 }
3771 }
3772
3773
ParseV8Intrinsic(bool * ok)3774 Expression* Parser::ParseV8Intrinsic(bool* ok) {
3775 // CallRuntime ::
3776 // '%' Identifier Arguments
3777
3778 Expect(Token::MOD, CHECK_OK);
3779 Handle<String> name = ParseIdentifier(CHECK_OK);
3780 Runtime::Function* function =
3781 Runtime::FunctionForName(scanner_.literal_string());
3782 ZoneList<Expression*>* args = ParseArguments(CHECK_OK);
3783 if (function == NULL && extension_ != NULL) {
3784 // The extension structures are only accessible while parsing the
3785 // very first time not when reparsing because of lazy compilation.
3786 top_scope_->ForceEagerCompilation();
3787 }
3788
3789 // Check for built-in macros.
3790 if (!is_pre_parsing_) {
3791 if (function == Runtime::FunctionForId(Runtime::kIS_VAR)) {
3792 // %IS_VAR(x)
3793 // evaluates to x if x is a variable,
3794 // leads to a parse error otherwise
3795 if (args->length() == 1 && args->at(0)->AsVariableProxy() != NULL) {
3796 return args->at(0);
3797 }
3798 *ok = false;
3799 // Check here for other macros.
3800 // } else if (function == Runtime::FunctionForId(Runtime::kIS_VAR)) {
3801 // ...
3802 }
3803
3804 if (!*ok) {
3805 // We found a macro but it failed.
3806 ReportMessage("unable_to_parse", Vector<const char*>::empty());
3807 return NULL;
3808 }
3809 }
3810
3811 // Otherwise we have a runtime call.
3812 return NEW(CallRuntime(name, function, args));
3813 }
3814
3815
Consume(Token::Value token)3816 void Parser::Consume(Token::Value token) {
3817 Token::Value next = Next();
3818 USE(next);
3819 USE(token);
3820 ASSERT(next == token);
3821 }
3822
3823
Expect(Token::Value token,bool * ok)3824 void Parser::Expect(Token::Value token, bool* ok) {
3825 Token::Value next = Next();
3826 if (next == token) return;
3827 ReportUnexpectedToken(next);
3828 *ok = false;
3829 }
3830
3831
ExpectSemicolon(bool * ok)3832 void Parser::ExpectSemicolon(bool* ok) {
3833 // Check for automatic semicolon insertion according to
3834 // the rules given in ECMA-262, section 7.9, page 21.
3835 Token::Value tok = peek();
3836 if (tok == Token::SEMICOLON) {
3837 Next();
3838 return;
3839 }
3840 if (scanner_.has_line_terminator_before_next() ||
3841 tok == Token::RBRACE ||
3842 tok == Token::EOS) {
3843 return;
3844 }
3845 Expect(Token::SEMICOLON, ok);
3846 }
3847
3848
GetLiteralUndefined()3849 Literal* Parser::GetLiteralUndefined() {
3850 return NEW(Literal(Factory::undefined_value()));
3851 }
3852
3853
GetLiteralTheHole()3854 Literal* Parser::GetLiteralTheHole() {
3855 return NEW(Literal(Factory::the_hole_value()));
3856 }
3857
3858
GetLiteralNumber(double value)3859 Literal* Parser::GetLiteralNumber(double value) {
3860 return NewNumberLiteral(value);
3861 }
3862
3863
ParseIdentifier(bool * ok)3864 Handle<String> Parser::ParseIdentifier(bool* ok) {
3865 Expect(Token::IDENTIFIER, ok);
3866 if (!*ok) return Handle<String>();
3867 return factory()->LookupSymbol(scanner_.literal_string(),
3868 scanner_.literal_length());
3869 }
3870
3871 // This function reads an identifier and determines whether or not it
3872 // is 'get' or 'set'. The reason for not using ParseIdentifier and
3873 // checking on the output is that this involves heap allocation which
3874 // we can't do during preparsing.
ParseIdentifierOrGetOrSet(bool * is_get,bool * is_set,bool * ok)3875 Handle<String> Parser::ParseIdentifierOrGetOrSet(bool* is_get,
3876 bool* is_set,
3877 bool* ok) {
3878 Expect(Token::IDENTIFIER, ok);
3879 if (!*ok) return Handle<String>();
3880 if (scanner_.literal_length() == 3) {
3881 const char* token = scanner_.literal_string();
3882 *is_get = strcmp(token, "get") == 0;
3883 *is_set = !*is_get && strcmp(token, "set") == 0;
3884 }
3885 return factory()->LookupSymbol(scanner_.literal_string(),
3886 scanner_.literal_length());
3887 }
3888
3889
3890 // ----------------------------------------------------------------------------
3891 // Parser support
3892
3893
TargetStackContainsLabel(Handle<String> label)3894 bool Parser::TargetStackContainsLabel(Handle<String> label) {
3895 for (Target* t = target_stack_; t != NULL; t = t->previous()) {
3896 BreakableStatement* stat = t->node()->AsBreakableStatement();
3897 if (stat != NULL && ContainsLabel(stat->labels(), label))
3898 return true;
3899 }
3900 return false;
3901 }
3902
3903
LookupBreakTarget(Handle<String> label,bool * ok)3904 BreakableStatement* Parser::LookupBreakTarget(Handle<String> label, bool* ok) {
3905 bool anonymous = label.is_null();
3906 for (Target* t = target_stack_; t != NULL; t = t->previous()) {
3907 BreakableStatement* stat = t->node()->AsBreakableStatement();
3908 if (stat == NULL) continue;
3909 if ((anonymous && stat->is_target_for_anonymous()) ||
3910 (!anonymous && ContainsLabel(stat->labels(), label))) {
3911 RegisterTargetUse(stat->break_target(), t->previous());
3912 return stat;
3913 }
3914 }
3915 return NULL;
3916 }
3917
3918
LookupContinueTarget(Handle<String> label,bool * ok)3919 IterationStatement* Parser::LookupContinueTarget(Handle<String> label,
3920 bool* ok) {
3921 bool anonymous = label.is_null();
3922 for (Target* t = target_stack_; t != NULL; t = t->previous()) {
3923 IterationStatement* stat = t->node()->AsIterationStatement();
3924 if (stat == NULL) continue;
3925
3926 ASSERT(stat->is_target_for_anonymous());
3927 if (anonymous || ContainsLabel(stat->labels(), label)) {
3928 RegisterTargetUse(stat->continue_target(), t->previous());
3929 return stat;
3930 }
3931 }
3932 return NULL;
3933 }
3934
3935
RegisterTargetUse(BreakTarget * target,Target * stop)3936 void Parser::RegisterTargetUse(BreakTarget* target, Target* stop) {
3937 // Register that a break target found at the given stop in the
3938 // target stack has been used from the top of the target stack. Add
3939 // the break target to any TargetCollectors passed on the stack.
3940 for (Target* t = target_stack_; t != stop; t = t->previous()) {
3941 TargetCollector* collector = t->node()->AsTargetCollector();
3942 if (collector != NULL) collector->AddTarget(target);
3943 }
3944 }
3945
3946
NewNumberLiteral(double number)3947 Literal* Parser::NewNumberLiteral(double number) {
3948 return NEW(Literal(Factory::NewNumber(number, TENURED)));
3949 }
3950
3951
NewThrowReferenceError(Handle<String> type)3952 Expression* Parser::NewThrowReferenceError(Handle<String> type) {
3953 return NewThrowError(Factory::MakeReferenceError_symbol(),
3954 type, HandleVector<Object>(NULL, 0));
3955 }
3956
3957
NewThrowSyntaxError(Handle<String> type,Handle<Object> first)3958 Expression* Parser::NewThrowSyntaxError(Handle<String> type,
3959 Handle<Object> first) {
3960 int argc = first.is_null() ? 0 : 1;
3961 Vector< Handle<Object> > arguments = HandleVector<Object>(&first, argc);
3962 return NewThrowError(Factory::MakeSyntaxError_symbol(), type, arguments);
3963 }
3964
3965
NewThrowTypeError(Handle<String> type,Handle<Object> first,Handle<Object> second)3966 Expression* Parser::NewThrowTypeError(Handle<String> type,
3967 Handle<Object> first,
3968 Handle<Object> second) {
3969 ASSERT(!first.is_null() && !second.is_null());
3970 Handle<Object> elements[] = { first, second };
3971 Vector< Handle<Object> > arguments =
3972 HandleVector<Object>(elements, ARRAY_SIZE(elements));
3973 return NewThrowError(Factory::MakeTypeError_symbol(), type, arguments);
3974 }
3975
3976
NewThrowError(Handle<String> constructor,Handle<String> type,Vector<Handle<Object>> arguments)3977 Expression* Parser::NewThrowError(Handle<String> constructor,
3978 Handle<String> type,
3979 Vector< Handle<Object> > arguments) {
3980 if (is_pre_parsing_) return NULL;
3981
3982 int argc = arguments.length();
3983 Handle<JSArray> array = Factory::NewJSArray(argc, TENURED);
3984 ASSERT(array->IsJSArray() && array->HasFastElements());
3985 for (int i = 0; i < argc; i++) {
3986 Handle<Object> element = arguments[i];
3987 if (!element.is_null()) {
3988 array->SetFastElement(i, *element);
3989 }
3990 }
3991 ZoneList<Expression*>* args = new ZoneList<Expression*>(2);
3992 args->Add(new Literal(type));
3993 args->Add(new Literal(array));
3994 return new Throw(new CallRuntime(constructor, NULL, args),
3995 scanner().location().beg_pos);
3996 }
3997
3998
3999 // ----------------------------------------------------------------------------
4000 // Regular expressions
4001
4002
RegExpParser(FlatStringReader * in,Handle<String> * error,bool multiline)4003 RegExpParser::RegExpParser(FlatStringReader* in,
4004 Handle<String>* error,
4005 bool multiline)
4006 : current_(kEndMarker),
4007 has_more_(true),
4008 multiline_(multiline),
4009 next_pos_(0),
4010 in_(in),
4011 error_(error),
4012 simple_(false),
4013 contains_anchor_(false),
4014 captures_(NULL),
4015 is_scanned_for_captures_(false),
4016 capture_count_(0),
4017 failed_(false) {
4018 Advance(1);
4019 }
4020
4021
Next()4022 uc32 RegExpParser::Next() {
4023 if (has_next()) {
4024 return in()->Get(next_pos_);
4025 } else {
4026 return kEndMarker;
4027 }
4028 }
4029
4030
Advance()4031 void RegExpParser::Advance() {
4032 if (next_pos_ < in()->length()) {
4033 StackLimitCheck check;
4034 if (check.HasOverflowed()) {
4035 ReportError(CStrVector(Top::kStackOverflowMessage));
4036 } else if (Zone::excess_allocation()) {
4037 ReportError(CStrVector("Regular expression too large"));
4038 } else {
4039 current_ = in()->Get(next_pos_);
4040 next_pos_++;
4041 }
4042 } else {
4043 current_ = kEndMarker;
4044 has_more_ = false;
4045 }
4046 }
4047
4048
Reset(int pos)4049 void RegExpParser::Reset(int pos) {
4050 next_pos_ = pos;
4051 Advance();
4052 }
4053
4054
Advance(int dist)4055 void RegExpParser::Advance(int dist) {
4056 for (int i = 0; i < dist; i++)
4057 Advance();
4058 }
4059
4060
simple()4061 bool RegExpParser::simple() {
4062 return simple_;
4063 }
4064
ReportError(Vector<const char> message)4065 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
4066 failed_ = true;
4067 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);
4068 // Zip to the end to make sure the no more input is read.
4069 current_ = kEndMarker;
4070 next_pos_ = in()->length();
4071 return NULL;
4072 }
4073
4074
4075 // Pattern ::
4076 // Disjunction
ParsePattern()4077 RegExpTree* RegExpParser::ParsePattern() {
4078 RegExpTree* result = ParseDisjunction(CHECK_FAILED);
4079 ASSERT(!has_more());
4080 // If the result of parsing is a literal string atom, and it has the
4081 // same length as the input, then the atom is identical to the input.
4082 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
4083 simple_ = true;
4084 }
4085 return result;
4086 }
4087
4088
4089 // Disjunction ::
4090 // Alternative
4091 // Alternative | Disjunction
4092 // Alternative ::
4093 // [empty]
4094 // Term Alternative
4095 // Term ::
4096 // Assertion
4097 // Atom
4098 // Atom Quantifier
ParseDisjunction()4099 RegExpTree* RegExpParser::ParseDisjunction() {
4100 // Used to store current state while parsing subexpressions.
4101 RegExpParserState initial_state(NULL, INITIAL, 0);
4102 RegExpParserState* stored_state = &initial_state;
4103 // Cache the builder in a local variable for quick access.
4104 RegExpBuilder* builder = initial_state.builder();
4105 while (true) {
4106 switch (current()) {
4107 case kEndMarker:
4108 if (stored_state->IsSubexpression()) {
4109 // Inside a parenthesized group when hitting end of input.
4110 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
4111 }
4112 ASSERT_EQ(INITIAL, stored_state->group_type());
4113 // Parsing completed successfully.
4114 return builder->ToRegExp();
4115 case ')': {
4116 if (!stored_state->IsSubexpression()) {
4117 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
4118 }
4119 ASSERT_NE(INITIAL, stored_state->group_type());
4120
4121 Advance();
4122 // End disjunction parsing and convert builder content to new single
4123 // regexp atom.
4124 RegExpTree* body = builder->ToRegExp();
4125
4126 int end_capture_index = captures_started();
4127
4128 int capture_index = stored_state->capture_index();
4129 SubexpressionType type = stored_state->group_type();
4130
4131 // Restore previous state.
4132 stored_state = stored_state->previous_state();
4133 builder = stored_state->builder();
4134
4135 // Build result of subexpression.
4136 if (type == CAPTURE) {
4137 RegExpCapture* capture = new RegExpCapture(body, capture_index);
4138 captures_->at(capture_index - 1) = capture;
4139 body = capture;
4140 } else if (type != GROUPING) {
4141 ASSERT(type == POSITIVE_LOOKAHEAD || type == NEGATIVE_LOOKAHEAD);
4142 bool is_positive = (type == POSITIVE_LOOKAHEAD);
4143 body = new RegExpLookahead(body,
4144 is_positive,
4145 end_capture_index - capture_index,
4146 capture_index);
4147 }
4148 builder->AddAtom(body);
4149 break;
4150 }
4151 case '|': {
4152 Advance();
4153 builder->NewAlternative();
4154 continue;
4155 }
4156 case '*':
4157 case '+':
4158 case '?':
4159 return ReportError(CStrVector("Nothing to repeat"));
4160 case '^': {
4161 Advance();
4162 if (multiline_) {
4163 builder->AddAssertion(
4164 new RegExpAssertion(RegExpAssertion::START_OF_LINE));
4165 } else {
4166 builder->AddAssertion(
4167 new RegExpAssertion(RegExpAssertion::START_OF_INPUT));
4168 set_contains_anchor();
4169 }
4170 continue;
4171 }
4172 case '$': {
4173 Advance();
4174 RegExpAssertion::Type type =
4175 multiline_ ? RegExpAssertion::END_OF_LINE :
4176 RegExpAssertion::END_OF_INPUT;
4177 builder->AddAssertion(new RegExpAssertion(type));
4178 continue;
4179 }
4180 case '.': {
4181 Advance();
4182 // everything except \x0a, \x0d, \u2028 and \u2029
4183 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4184 CharacterRange::AddClassEscape('.', ranges);
4185 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
4186 builder->AddAtom(atom);
4187 break;
4188 }
4189 case '(': {
4190 SubexpressionType type = CAPTURE;
4191 Advance();
4192 if (current() == '?') {
4193 switch (Next()) {
4194 case ':':
4195 type = GROUPING;
4196 break;
4197 case '=':
4198 type = POSITIVE_LOOKAHEAD;
4199 break;
4200 case '!':
4201 type = NEGATIVE_LOOKAHEAD;
4202 break;
4203 default:
4204 ReportError(CStrVector("Invalid group") CHECK_FAILED);
4205 break;
4206 }
4207 Advance(2);
4208 } else {
4209 if (captures_ == NULL) {
4210 captures_ = new ZoneList<RegExpCapture*>(2);
4211 }
4212 if (captures_started() >= kMaxCaptures) {
4213 ReportError(CStrVector("Too many captures") CHECK_FAILED);
4214 }
4215 captures_->Add(NULL);
4216 }
4217 // Store current state and begin new disjunction parsing.
4218 stored_state = new RegExpParserState(stored_state,
4219 type,
4220 captures_started());
4221 builder = stored_state->builder();
4222 break;
4223 }
4224 case '[': {
4225 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
4226 builder->AddAtom(atom);
4227 break;
4228 }
4229 // Atom ::
4230 // \ AtomEscape
4231 case '\\':
4232 switch (Next()) {
4233 case kEndMarker:
4234 return ReportError(CStrVector("\\ at end of pattern"));
4235 case 'b':
4236 Advance(2);
4237 builder->AddAssertion(
4238 new RegExpAssertion(RegExpAssertion::BOUNDARY));
4239 continue;
4240 case 'B':
4241 Advance(2);
4242 builder->AddAssertion(
4243 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
4244 continue;
4245 // AtomEscape ::
4246 // CharacterClassEscape
4247 //
4248 // CharacterClassEscape :: one of
4249 // d D s S w W
4250 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
4251 uc32 c = Next();
4252 Advance(2);
4253 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4254 CharacterRange::AddClassEscape(c, ranges);
4255 RegExpTree* atom = new RegExpCharacterClass(ranges, false);
4256 builder->AddAtom(atom);
4257 break;
4258 }
4259 case '1': case '2': case '3': case '4': case '5': case '6':
4260 case '7': case '8': case '9': {
4261 int index = 0;
4262 if (ParseBackReferenceIndex(&index)) {
4263 RegExpCapture* capture = NULL;
4264 if (captures_ != NULL && index <= captures_->length()) {
4265 capture = captures_->at(index - 1);
4266 }
4267 if (capture == NULL) {
4268 builder->AddEmpty();
4269 break;
4270 }
4271 RegExpTree* atom = new RegExpBackReference(capture);
4272 builder->AddAtom(atom);
4273 break;
4274 }
4275 uc32 first_digit = Next();
4276 if (first_digit == '8' || first_digit == '9') {
4277 // Treat as identity escape
4278 builder->AddCharacter(first_digit);
4279 Advance(2);
4280 break;
4281 }
4282 }
4283 // FALLTHROUGH
4284 case '0': {
4285 Advance();
4286 uc32 octal = ParseOctalLiteral();
4287 builder->AddCharacter(octal);
4288 break;
4289 }
4290 // ControlEscape :: one of
4291 // f n r t v
4292 case 'f':
4293 Advance(2);
4294 builder->AddCharacter('\f');
4295 break;
4296 case 'n':
4297 Advance(2);
4298 builder->AddCharacter('\n');
4299 break;
4300 case 'r':
4301 Advance(2);
4302 builder->AddCharacter('\r');
4303 break;
4304 case 't':
4305 Advance(2);
4306 builder->AddCharacter('\t');
4307 break;
4308 case 'v':
4309 Advance(2);
4310 builder->AddCharacter('\v');
4311 break;
4312 case 'c': {
4313 Advance(2);
4314 uc32 control = ParseControlLetterEscape();
4315 builder->AddCharacter(control);
4316 break;
4317 }
4318 case 'x': {
4319 Advance(2);
4320 uc32 value;
4321 if (ParseHexEscape(2, &value)) {
4322 builder->AddCharacter(value);
4323 } else {
4324 builder->AddCharacter('x');
4325 }
4326 break;
4327 }
4328 case 'u': {
4329 Advance(2);
4330 uc32 value;
4331 if (ParseHexEscape(4, &value)) {
4332 builder->AddCharacter(value);
4333 } else {
4334 builder->AddCharacter('u');
4335 }
4336 break;
4337 }
4338 default:
4339 // Identity escape.
4340 builder->AddCharacter(Next());
4341 Advance(2);
4342 break;
4343 }
4344 break;
4345 case '{': {
4346 int dummy;
4347 if (ParseIntervalQuantifier(&dummy, &dummy)) {
4348 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
4349 }
4350 // fallthrough
4351 }
4352 default:
4353 builder->AddCharacter(current());
4354 Advance();
4355 break;
4356 } // end switch(current())
4357
4358 int min;
4359 int max;
4360 switch (current()) {
4361 // QuantifierPrefix ::
4362 // *
4363 // +
4364 // ?
4365 // {
4366 case '*':
4367 min = 0;
4368 max = RegExpTree::kInfinity;
4369 Advance();
4370 break;
4371 case '+':
4372 min = 1;
4373 max = RegExpTree::kInfinity;
4374 Advance();
4375 break;
4376 case '?':
4377 min = 0;
4378 max = 1;
4379 Advance();
4380 break;
4381 case '{':
4382 if (ParseIntervalQuantifier(&min, &max)) {
4383 if (max < min) {
4384 ReportError(CStrVector("numbers out of order in {} quantifier.")
4385 CHECK_FAILED);
4386 }
4387 break;
4388 } else {
4389 continue;
4390 }
4391 default:
4392 continue;
4393 }
4394 bool is_greedy = true;
4395 if (current() == '?') {
4396 is_greedy = false;
4397 Advance();
4398 }
4399 builder->AddQuantifierToAtom(min, max, is_greedy);
4400 }
4401 }
4402
4403 class SourceCharacter {
4404 public:
Is(uc32 c)4405 static bool Is(uc32 c) {
4406 switch (c) {
4407 // case ']': case '}':
4408 // In spidermonkey and jsc these are treated as source characters
4409 // so we do too.
4410 case '^': case '$': case '\\': case '.': case '*': case '+':
4411 case '?': case '(': case ')': case '[': case '{': case '|':
4412 case RegExpParser::kEndMarker:
4413 return false;
4414 default:
4415 return true;
4416 }
4417 }
4418 };
4419
4420
4421 static unibrow::Predicate<SourceCharacter> source_character;
4422
4423
IsSourceCharacter(uc32 c)4424 static inline bool IsSourceCharacter(uc32 c) {
4425 return source_character.get(c);
4426 }
4427
4428 #ifdef DEBUG
4429 // Currently only used in an ASSERT.
IsSpecialClassEscape(uc32 c)4430 static bool IsSpecialClassEscape(uc32 c) {
4431 switch (c) {
4432 case 'd': case 'D':
4433 case 's': case 'S':
4434 case 'w': case 'W':
4435 return true;
4436 default:
4437 return false;
4438 }
4439 }
4440 #endif
4441
4442
4443 // In order to know whether an escape is a backreference or not we have to scan
4444 // the entire regexp and find the number of capturing parentheses. However we
4445 // don't want to scan the regexp twice unless it is necessary. This mini-parser
4446 // is called when needed. It can see the difference between capturing and
4447 // noncapturing parentheses and can skip character classes and backslash-escaped
4448 // characters.
ScanForCaptures()4449 void RegExpParser::ScanForCaptures() {
4450 // Start with captures started previous to current position
4451 int capture_count = captures_started();
4452 // Add count of captures after this position.
4453 int n;
4454 while ((n = current()) != kEndMarker) {
4455 Advance();
4456 switch (n) {
4457 case '\\':
4458 Advance();
4459 break;
4460 case '[': {
4461 int c;
4462 while ((c = current()) != kEndMarker) {
4463 Advance();
4464 if (c == '\\') {
4465 Advance();
4466 } else {
4467 if (c == ']') break;
4468 }
4469 }
4470 break;
4471 }
4472 case '(':
4473 if (current() != '?') capture_count++;
4474 break;
4475 }
4476 }
4477 capture_count_ = capture_count;
4478 is_scanned_for_captures_ = true;
4479 }
4480
4481
ParseBackReferenceIndex(int * index_out)4482 bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
4483 ASSERT_EQ('\\', current());
4484 ASSERT('1' <= Next() && Next() <= '9');
4485 // Try to parse a decimal literal that is no greater than the total number
4486 // of left capturing parentheses in the input.
4487 int start = position();
4488 int value = Next() - '0';
4489 Advance(2);
4490 while (true) {
4491 uc32 c = current();
4492 if (IsDecimalDigit(c)) {
4493 value = 10 * value + (c - '0');
4494 if (value > kMaxCaptures) {
4495 Reset(start);
4496 return false;
4497 }
4498 Advance();
4499 } else {
4500 break;
4501 }
4502 }
4503 if (value > captures_started()) {
4504 if (!is_scanned_for_captures_) {
4505 int saved_position = position();
4506 ScanForCaptures();
4507 Reset(saved_position);
4508 }
4509 if (value > capture_count_) {
4510 Reset(start);
4511 return false;
4512 }
4513 }
4514 *index_out = value;
4515 return true;
4516 }
4517
4518
4519 // QuantifierPrefix ::
4520 // { DecimalDigits }
4521 // { DecimalDigits , }
4522 // { DecimalDigits , DecimalDigits }
4523 //
4524 // Returns true if parsing succeeds, and set the min_out and max_out
4525 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
ParseIntervalQuantifier(int * min_out,int * max_out)4526 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
4527 ASSERT_EQ(current(), '{');
4528 int start = position();
4529 Advance();
4530 int min = 0;
4531 if (!IsDecimalDigit(current())) {
4532 Reset(start);
4533 return false;
4534 }
4535 while (IsDecimalDigit(current())) {
4536 int next = current() - '0';
4537 if (min > (RegExpTree::kInfinity - next) / 10) {
4538 // Overflow. Skip past remaining decimal digits and return -1.
4539 do {
4540 Advance();
4541 } while (IsDecimalDigit(current()));
4542 min = RegExpTree::kInfinity;
4543 break;
4544 }
4545 min = 10 * min + next;
4546 Advance();
4547 }
4548 int max = 0;
4549 if (current() == '}') {
4550 max = min;
4551 Advance();
4552 } else if (current() == ',') {
4553 Advance();
4554 if (current() == '}') {
4555 max = RegExpTree::kInfinity;
4556 Advance();
4557 } else {
4558 while (IsDecimalDigit(current())) {
4559 int next = current() - '0';
4560 if (max > (RegExpTree::kInfinity - next) / 10) {
4561 do {
4562 Advance();
4563 } while (IsDecimalDigit(current()));
4564 max = RegExpTree::kInfinity;
4565 break;
4566 }
4567 max = 10 * max + next;
4568 Advance();
4569 }
4570 if (current() != '}') {
4571 Reset(start);
4572 return false;
4573 }
4574 Advance();
4575 }
4576 } else {
4577 Reset(start);
4578 return false;
4579 }
4580 *min_out = min;
4581 *max_out = max;
4582 return true;
4583 }
4584
4585
4586 // Upper and lower case letters differ by one bit.
4587 STATIC_CHECK(('a' ^ 'A') == 0x20);
4588
ParseControlLetterEscape()4589 uc32 RegExpParser::ParseControlLetterEscape() {
4590 if (!has_more())
4591 return 'c';
4592 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.
4593 if (letter < 'A' || 'Z' < letter) {
4594 // Non-spec error-correction: "\c" followed by non-control letter is
4595 // interpreted as an IdentityEscape of 'c'.
4596 return 'c';
4597 }
4598 Advance();
4599 return letter & 0x1f; // Remainder modulo 32, per specification.
4600 }
4601
4602
ParseOctalLiteral()4603 uc32 RegExpParser::ParseOctalLiteral() {
4604 ASSERT('0' <= current() && current() <= '7');
4605 // For compatibility with some other browsers (not all), we parse
4606 // up to three octal digits with a value below 256.
4607 uc32 value = current() - '0';
4608 Advance();
4609 if ('0' <= current() && current() <= '7') {
4610 value = value * 8 + current() - '0';
4611 Advance();
4612 if (value < 32 && '0' <= current() && current() <= '7') {
4613 value = value * 8 + current() - '0';
4614 Advance();
4615 }
4616 }
4617 return value;
4618 }
4619
4620
ParseHexEscape(int length,uc32 * value)4621 bool RegExpParser::ParseHexEscape(int length, uc32 *value) {
4622 int start = position();
4623 uc32 val = 0;
4624 bool done = false;
4625 for (int i = 0; !done; i++) {
4626 uc32 c = current();
4627 int d = HexValue(c);
4628 if (d < 0) {
4629 Reset(start);
4630 return false;
4631 }
4632 val = val * 16 + d;
4633 Advance();
4634 if (i == length - 1) {
4635 done = true;
4636 }
4637 }
4638 *value = val;
4639 return true;
4640 }
4641
4642
ParseClassCharacterEscape()4643 uc32 RegExpParser::ParseClassCharacterEscape() {
4644 ASSERT(current() == '\\');
4645 ASSERT(has_next() && !IsSpecialClassEscape(Next()));
4646 Advance();
4647 switch (current()) {
4648 case 'b':
4649 Advance();
4650 return '\b';
4651 // ControlEscape :: one of
4652 // f n r t v
4653 case 'f':
4654 Advance();
4655 return '\f';
4656 case 'n':
4657 Advance();
4658 return '\n';
4659 case 'r':
4660 Advance();
4661 return '\r';
4662 case 't':
4663 Advance();
4664 return '\t';
4665 case 'v':
4666 Advance();
4667 return '\v';
4668 case 'c':
4669 Advance();
4670 return ParseControlLetterEscape();
4671 case '0': case '1': case '2': case '3': case '4': case '5':
4672 case '6': case '7':
4673 // For compatibility, we interpret a decimal escape that isn't
4674 // a back reference (and therefore either \0 or not valid according
4675 // to the specification) as a 1..3 digit octal character code.
4676 return ParseOctalLiteral();
4677 case 'x': {
4678 Advance();
4679 uc32 value;
4680 if (ParseHexEscape(2, &value)) {
4681 return value;
4682 }
4683 // If \x is not followed by a two-digit hexadecimal, treat it
4684 // as an identity escape.
4685 return 'x';
4686 }
4687 case 'u': {
4688 Advance();
4689 uc32 value;
4690 if (ParseHexEscape(4, &value)) {
4691 return value;
4692 }
4693 // If \u is not followed by a four-digit hexadecimal, treat it
4694 // as an identity escape.
4695 return 'u';
4696 }
4697 default: {
4698 // Extended identity escape. We accept any character that hasn't
4699 // been matched by a more specific case, not just the subset required
4700 // by the ECMAScript specification.
4701 uc32 result = current();
4702 Advance();
4703 return result;
4704 }
4705 }
4706 return 0;
4707 }
4708
4709
ParseClassAtom(uc16 * char_class)4710 CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
4711 ASSERT_EQ(0, *char_class);
4712 uc32 first = current();
4713 if (first == '\\') {
4714 switch (Next()) {
4715 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
4716 *char_class = Next();
4717 Advance(2);
4718 return CharacterRange::Singleton(0); // Return dummy value.
4719 }
4720 case kEndMarker:
4721 return ReportError(CStrVector("\\ at end of pattern"));
4722 default:
4723 uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
4724 return CharacterRange::Singleton(c);
4725 }
4726 } else {
4727 Advance();
4728 return CharacterRange::Singleton(first);
4729 }
4730 }
4731
4732
ParseCharacterClass()4733 RegExpTree* RegExpParser::ParseCharacterClass() {
4734 static const char* kUnterminated = "Unterminated character class";
4735 static const char* kRangeOutOfOrder = "Range out of order in character class";
4736
4737 ASSERT_EQ(current(), '[');
4738 Advance();
4739 bool is_negated = false;
4740 if (current() == '^') {
4741 is_negated = true;
4742 Advance();
4743 }
4744 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
4745 while (has_more() && current() != ']') {
4746 uc16 char_class = 0;
4747 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
4748 if (char_class) {
4749 CharacterRange::AddClassEscape(char_class, ranges);
4750 continue;
4751 }
4752 if (current() == '-') {
4753 Advance();
4754 if (current() == kEndMarker) {
4755 // If we reach the end we break out of the loop and let the
4756 // following code report an error.
4757 break;
4758 } else if (current() == ']') {
4759 ranges->Add(first);
4760 ranges->Add(CharacterRange::Singleton('-'));
4761 break;
4762 }
4763 CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
4764 if (char_class) {
4765 ranges->Add(first);
4766 ranges->Add(CharacterRange::Singleton('-'));
4767 CharacterRange::AddClassEscape(char_class, ranges);
4768 continue;
4769 }
4770 if (first.from() > next.to()) {
4771 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
4772 }
4773 ranges->Add(CharacterRange::Range(first.from(), next.to()));
4774 } else {
4775 ranges->Add(first);
4776 }
4777 }
4778 if (!has_more()) {
4779 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
4780 }
4781 Advance();
4782 if (ranges->length() == 0) {
4783 ranges->Add(CharacterRange::Everything());
4784 is_negated = !is_negated;
4785 }
4786 return new RegExpCharacterClass(ranges, is_negated);
4787 }
4788
4789
4790 // ----------------------------------------------------------------------------
4791 // The Parser interface.
4792
4793 // MakeAST() is just a wrapper for the corresponding Parser calls
4794 // so we don't have to expose the entire Parser class in the .h file.
4795
4796 static bool always_allow_natives_syntax = false;
4797
4798
~ParserMessage()4799 ParserMessage::~ParserMessage() {
4800 for (int i = 0; i < args().length(); i++)
4801 DeleteArray(args()[i]);
4802 DeleteArray(args().start());
4803 }
4804
4805
~ScriptDataImpl()4806 ScriptDataImpl::~ScriptDataImpl() {
4807 store_.Dispose();
4808 }
4809
4810
Length()4811 int ScriptDataImpl::Length() {
4812 return store_.length();
4813 }
4814
4815
Data()4816 unsigned* ScriptDataImpl::Data() {
4817 return store_.start();
4818 }
4819
4820
PreParse(Handle<String> source,unibrow::CharacterStream * stream,v8::Extension * extension)4821 ScriptDataImpl* PreParse(Handle<String> source,
4822 unibrow::CharacterStream* stream,
4823 v8::Extension* extension) {
4824 Handle<Script> no_script;
4825 bool allow_natives_syntax =
4826 always_allow_natives_syntax ||
4827 FLAG_allow_natives_syntax ||
4828 Bootstrapper::IsActive();
4829 PreParser parser(no_script, allow_natives_syntax, extension);
4830 if (!parser.PreParseProgram(source, stream)) return NULL;
4831 // The list owns the backing store so we need to clone the vector.
4832 // That way, the result will be exactly the right size rather than
4833 // the expected 50% too large.
4834 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();
4835 return new ScriptDataImpl(store);
4836 }
4837
4838
ParseRegExp(FlatStringReader * input,bool multiline,RegExpCompileData * result)4839 bool ParseRegExp(FlatStringReader* input,
4840 bool multiline,
4841 RegExpCompileData* result) {
4842 ASSERT(result != NULL);
4843 // Make sure we have a stack guard.
4844 StackGuard guard;
4845 RegExpParser parser(input, &result->error, multiline);
4846 RegExpTree* tree = parser.ParsePattern();
4847 if (parser.failed()) {
4848 ASSERT(tree == NULL);
4849 ASSERT(!result->error.is_null());
4850 } else {
4851 ASSERT(tree != NULL);
4852 ASSERT(result->error.is_null());
4853 result->tree = tree;
4854 int capture_count = parser.captures_started();
4855 result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
4856 result->contains_anchor = parser.contains_anchor();
4857 result->capture_count = capture_count;
4858 }
4859 return !parser.failed();
4860 }
4861
4862
MakeAST(bool compile_in_global_context,Handle<Script> script,v8::Extension * extension,ScriptDataImpl * pre_data)4863 FunctionLiteral* MakeAST(bool compile_in_global_context,
4864 Handle<Script> script,
4865 v8::Extension* extension,
4866 ScriptDataImpl* pre_data) {
4867 bool allow_natives_syntax =
4868 always_allow_natives_syntax ||
4869 FLAG_allow_natives_syntax ||
4870 Bootstrapper::IsActive();
4871 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data);
4872 if (pre_data != NULL && pre_data->has_error()) {
4873 Scanner::Location loc = pre_data->MessageLocation();
4874 const char* message = pre_data->BuildMessage();
4875 Vector<const char*> args = pre_data->BuildArgs();
4876 parser.ReportMessageAt(loc, message, args);
4877 DeleteArray(message);
4878 for (int i = 0; i < args.length(); i++)
4879 DeleteArray(args[i]);
4880 DeleteArray(args.start());
4881 return NULL;
4882 }
4883 Handle<String> source = Handle<String>(String::cast(script->source()));
4884 SafeStringInputBuffer input(source.location());
4885 FunctionLiteral* result = parser.ParseProgram(source,
4886 &input, compile_in_global_context);
4887 return result;
4888 }
4889
4890
MakeLazyAST(Handle<Script> script,Handle<String> name,int start_position,int end_position,bool is_expression)4891 FunctionLiteral* MakeLazyAST(Handle<Script> script,
4892 Handle<String> name,
4893 int start_position,
4894 int end_position,
4895 bool is_expression) {
4896 bool allow_natives_syntax_before = always_allow_natives_syntax;
4897 always_allow_natives_syntax = true;
4898 AstBuildingParser parser(script, true, NULL, NULL); // always allow
4899 always_allow_natives_syntax = allow_natives_syntax_before;
4900 // Parse the function by pulling the function source from the script source.
4901 Handle<String> script_source(String::cast(script->source()));
4902 FunctionLiteral* result =
4903 parser.ParseLazy(SubString(script_source, start_position, end_position),
4904 name,
4905 start_position,
4906 is_expression);
4907 return result;
4908 }
4909
4910
4911 #undef NEW
4912
4913
4914 } } // namespace v8::internal
4915