// Copyright 2012 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "v8.h" #include "api.h" #include "ast.h" #include "bootstrapper.h" #include "char-predicates-inl.h" #include "codegen.h" #include "compiler.h" #include "func-name-inferrer.h" #include "messages.h" #include "parser.h" #include "platform.h" #include "preparser.h" #include "runtime.h" #include "scanner-character-streams.h" #include "scopeinfo.h" #include "string-stream.h" namespace v8 { namespace internal { // PositionStack is used for on-stack allocation of token positions for // new expressions. Please look at ParseNewExpression. class PositionStack { public: explicit PositionStack(bool* ok) : top_(NULL), ok_(ok) {} ~PositionStack() { ASSERT(!*ok_ || is_empty()); } class Element { public: Element(PositionStack* stack, int value) { previous_ = stack->top(); value_ = value; stack->set_top(this); } private: Element* previous() { return previous_; } int value() { return value_; } friend class PositionStack; Element* previous_; int value_; }; bool is_empty() { return top_ == NULL; } int pop() { ASSERT(!is_empty()); int result = top_->value(); top_ = top_->previous(); return result; } private: Element* top() { return top_; } void set_top(Element* value) { top_ = value; } Element* top_; bool* ok_; }; RegExpBuilder::RegExpBuilder() : zone_(Isolate::Current()->zone()), pending_empty_(false), characters_(NULL), terms_(), alternatives_() #ifdef DEBUG , last_added_(ADD_NONE) #endif {} void RegExpBuilder::FlushCharacters() { pending_empty_ = false; if (characters_ != NULL) { RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); characters_ = NULL; text_.Add(atom); LAST(ADD_ATOM); } } void RegExpBuilder::FlushText() { FlushCharacters(); int num_text = text_.length(); if (num_text == 0) { return; } else if (num_text == 1) { terms_.Add(text_.last()); } else { RegExpText* text = new(zone()) RegExpText(); for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text); terms_.Add(text); } text_.Clear(); } void RegExpBuilder::AddCharacter(uc16 c) { pending_empty_ = false; if (characters_ == NULL) { characters_ = new(zone()) ZoneList(4); } characters_->Add(c); LAST(ADD_CHAR); } void RegExpBuilder::AddEmpty() { pending_empty_ = true; } void RegExpBuilder::AddAtom(RegExpTree* term) { if (term->IsEmpty()) { AddEmpty(); return; } if (term->IsTextElement()) { FlushCharacters(); text_.Add(term); } else { FlushText(); terms_.Add(term); } LAST(ADD_ATOM); } void RegExpBuilder::AddAssertion(RegExpTree* assert) { FlushText(); terms_.Add(assert); LAST(ADD_ASSERT); } void RegExpBuilder::NewAlternative() { FlushTerms(); } void RegExpBuilder::FlushTerms() { FlushText(); int num_terms = terms_.length(); RegExpTree* alternative; if (num_terms == 0) { alternative = RegExpEmpty::GetInstance(); } else if (num_terms == 1) { alternative = terms_.last(); } else { alternative = new(zone()) RegExpAlternative(terms_.GetList()); } alternatives_.Add(alternative); terms_.Clear(); LAST(ADD_NONE); } RegExpTree* RegExpBuilder::ToRegExp() { FlushTerms(); int num_alternatives = alternatives_.length(); if (num_alternatives == 0) { return RegExpEmpty::GetInstance(); } if (num_alternatives == 1) { return alternatives_.last(); } return new(zone()) RegExpDisjunction(alternatives_.GetList()); } void RegExpBuilder::AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type) { if (pending_empty_) { pending_empty_ = false; return; } RegExpTree* atom; if (characters_ != NULL) { ASSERT(last_added_ == ADD_CHAR); // Last atom was character. Vector char_vector = characters_->ToConstVector(); int num_chars = char_vector.length(); if (num_chars > 1) { Vector prefix = char_vector.SubVector(0, num_chars - 1); text_.Add(new(zone()) RegExpAtom(prefix)); char_vector = char_vector.SubVector(num_chars - 1, num_chars); } characters_ = NULL; atom = new(zone()) RegExpAtom(char_vector); FlushText(); } else if (text_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = text_.RemoveLast(); FlushText(); } else if (terms_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = terms_.RemoveLast(); if (atom->max_match() == 0) { // Guaranteed to only match an empty string. LAST(ADD_TERM); if (min == 0) { return; } terms_.Add(atom); return; } } else { // Only call immediately after adding an atom or character! UNREACHABLE(); return; } terms_.Add(new(zone()) RegExpQuantifier(min, max, type, atom)); LAST(ADD_TERM); } Handle Parser::LookupSymbol(int symbol_id) { // Length of symbol cache is the number of identified symbols. // If we are larger than that, or negative, it's not a cached symbol. // This might also happen if there is no preparser symbol data, even // if there is some preparser data. if (static_cast(symbol_id) >= static_cast(symbol_cache_.length())) { if (scanner().is_literal_ascii()) { return isolate()->factory()->LookupAsciiSymbol( scanner().literal_ascii_string()); } else { return isolate()->factory()->LookupTwoByteSymbol( scanner().literal_utf16_string()); } } return LookupCachedSymbol(symbol_id); } Handle Parser::LookupCachedSymbol(int symbol_id) { // Make sure the cache is large enough to hold the symbol identifier. if (symbol_cache_.length() <= symbol_id) { // Increase length to index + 1. symbol_cache_.AddBlock(Handle::null(), symbol_id + 1 - symbol_cache_.length()); } Handle result = symbol_cache_.at(symbol_id); if (result.is_null()) { if (scanner().is_literal_ascii()) { result = isolate()->factory()->LookupAsciiSymbol( scanner().literal_ascii_string()); } else { result = isolate()->factory()->LookupTwoByteSymbol( scanner().literal_utf16_string()); } symbol_cache_.at(symbol_id) = result; return result; } isolate()->counters()->total_preparse_symbols_skipped()->Increment(); return result; } FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) { // The current pre-data entry must be a FunctionEntry with the given // start position. if ((function_index_ + FunctionEntry::kSize <= store_.length()) && (static_cast(store_[function_index_]) == start)) { int index = function_index_; function_index_ += FunctionEntry::kSize; return FunctionEntry(store_.SubVector(index, index + FunctionEntry::kSize)); } return FunctionEntry(); } int ScriptDataImpl::GetSymbolIdentifier() { return ReadNumber(&symbol_data_); } bool ScriptDataImpl::SanityCheck() { // Check that the header data is valid and doesn't specify // point to positions outside the store. if (store_.length() < PreparseDataConstants::kHeaderSize) return false; if (magic() != PreparseDataConstants::kMagicNumber) return false; if (version() != PreparseDataConstants::kCurrentVersion) return false; if (has_error()) { // Extra sane sanity check for error message encoding. if (store_.length() <= PreparseDataConstants::kHeaderSize + PreparseDataConstants::kMessageTextPos) { return false; } if (Read(PreparseDataConstants::kMessageStartPos) > Read(PreparseDataConstants::kMessageEndPos)) { return false; } unsigned arg_count = Read(PreparseDataConstants::kMessageArgCountPos); int pos = PreparseDataConstants::kMessageTextPos; for (unsigned int i = 0; i <= arg_count; i++) { if (store_.length() <= PreparseDataConstants::kHeaderSize + pos) { return false; } int length = static_cast(Read(pos)); if (length < 0) return false; pos += 1 + length; } if (store_.length() < PreparseDataConstants::kHeaderSize + pos) { return false; } return true; } // Check that the space allocated for function entries is sane. int functions_size = static_cast(store_[PreparseDataConstants::kFunctionsSizeOffset]); if (functions_size < 0) return false; if (functions_size % FunctionEntry::kSize != 0) return false; // Check that the count of symbols is non-negative. int symbol_count = static_cast(store_[PreparseDataConstants::kSymbolCountOffset]); if (symbol_count < 0) return false; // Check that the total size has room for header and function entries. int minimum_size = PreparseDataConstants::kHeaderSize + functions_size; if (store_.length() < minimum_size) return false; return true; } const char* ScriptDataImpl::ReadString(unsigned* start, int* chars) { int length = start[0]; char* result = NewArray(length + 1); for (int i = 0; i < length; i++) { result[i] = start[i + 1]; } result[length] = '\0'; if (chars != NULL) *chars = length; return result; } Scanner::Location ScriptDataImpl::MessageLocation() { int beg_pos = Read(PreparseDataConstants::kMessageStartPos); int end_pos = Read(PreparseDataConstants::kMessageEndPos); return Scanner::Location(beg_pos, end_pos); } const char* ScriptDataImpl::BuildMessage() { unsigned* start = ReadAddress(PreparseDataConstants::kMessageTextPos); return ReadString(start, NULL); } Vector ScriptDataImpl::BuildArgs() { int arg_count = Read(PreparseDataConstants::kMessageArgCountPos); const char** array = NewArray(arg_count); // Position after text found by skipping past length field and // length field content words. int pos = PreparseDataConstants::kMessageTextPos + 1 + Read(PreparseDataConstants::kMessageTextPos); for (int i = 0; i < arg_count; i++) { int count = 0; array[i] = ReadString(ReadAddress(pos), &count); pos += count + 1; } return Vector(array, arg_count); } unsigned ScriptDataImpl::Read(int position) { return store_[PreparseDataConstants::kHeaderSize + position]; } unsigned* ScriptDataImpl::ReadAddress(int position) { return &store_[PreparseDataConstants::kHeaderSize + position]; } Scope* Parser::NewScope(Scope* parent, ScopeType type) { Scope* result = new(zone()) Scope(parent, type); result->Initialize(); return result; } // ---------------------------------------------------------------------------- // Target is a support class to facilitate manipulation of the // Parser's target_stack_ (the stack of potential 'break' and // 'continue' statement targets). Upon construction, a new target is // added; it is removed upon destruction. class Target BASE_EMBEDDED { public: Target(Target** variable, AstNode* node) : variable_(variable), node_(node), previous_(*variable) { *variable = this; } ~Target() { *variable_ = previous_; } Target* previous() { return previous_; } AstNode* node() { return node_; } private: Target** variable_; AstNode* node_; Target* previous_; }; class TargetScope BASE_EMBEDDED { public: explicit TargetScope(Target** variable) : variable_(variable), previous_(*variable) { *variable = NULL; } ~TargetScope() { *variable_ = previous_; } private: Target** variable_; Target* previous_; }; // ---------------------------------------------------------------------------- // FunctionState and BlockState together implement the parser's scope stack. // The parser's current scope is in top_scope_. The BlockState and // FunctionState constructors push on the scope stack and the destructors // pop. They are also used to hold the parser's per-function and per-block // state. class Parser::BlockState BASE_EMBEDDED { public: BlockState(Parser* parser, Scope* scope) : parser_(parser), outer_scope_(parser->top_scope_) { parser->top_scope_ = scope; } ~BlockState() { parser_->top_scope_ = outer_scope_; } private: Parser* parser_; Scope* outer_scope_; }; Parser::FunctionState::FunctionState(Parser* parser, Scope* scope, Isolate* isolate) : next_materialized_literal_index_(JSFunction::kLiteralsPrefixSize), next_handler_index_(0), expected_property_count_(0), only_simple_this_property_assignments_(false), this_property_assignments_(isolate->factory()->empty_fixed_array()), parser_(parser), outer_function_state_(parser->current_function_state_), outer_scope_(parser->top_scope_), saved_ast_node_id_(isolate->ast_node_id()), factory_(isolate) { parser->top_scope_ = scope; parser->current_function_state_ = this; isolate->set_ast_node_id(AstNode::kDeclarationsId + 1); } Parser::FunctionState::~FunctionState() { parser_->top_scope_ = outer_scope_; parser_->current_function_state_ = outer_function_state_; if (outer_function_state_ != NULL) { parser_->isolate()->set_ast_node_id(saved_ast_node_id_); } } // ---------------------------------------------------------------------------- // The CHECK_OK macro is a convenient macro to enforce error // handling for functions that may fail (by returning !*ok). // // CAUTION: This macro appends extra statements after a call, // thus it must never be used where only a single statement // is correct (e.g. an if statement branch w/o braces)! #define CHECK_OK ok); \ if (!*ok) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY #define CHECK_FAILED /**/); \ if (failed_) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY // ---------------------------------------------------------------------------- // Implementation of Parser Parser::Parser(Handle