// Copyright 2017 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_ASMJS_ASM_SCANNER_H_ #define V8_ASMJS_ASM_SCANNER_H_ #include #include #include #include "src/asmjs/asm-names.h" #include "src/base/logging.h" #include "src/base/strings.h" #include "src/common/globals.h" namespace v8 { namespace internal { class Utf16CharacterStream; // A custom scanner to extract the token stream needed to parse valid // asm.js: http://asmjs.org/spec/latest/ // This scanner intentionally avoids the portion of JavaScript lexing // that are not required to determine if code is valid asm.js code. // * Strings are disallowed except for 'use asm'. // * Only the subset of keywords needed to check asm.js invariants are // included. // * Identifiers are accumulated into local + global string tables // (for performance). class V8_EXPORT_PRIVATE AsmJsScanner { public: using token_t = int32_t; explicit AsmJsScanner(Utf16CharacterStream* stream); // Get current token. token_t Token() const { return token_; } // Get position of current token. size_t Position() const { return position_; } // Advance to the next token. void Next(); // Back up by one token. void Rewind(); // Get raw string for current identifier. Note that the returned string will // become invalid when the scanner advances, create a copy to preserve it. const std::string& GetIdentifierString() const { // Identifier strings don't work after a rewind. DCHECK(!rewind_); return identifier_string_; } // Check if we just passed a newline. bool IsPrecededByNewline() const { // Newline tracking doesn't work if you back up. DCHECK(!rewind_); return preceded_by_newline_; } #if DEBUG // Debug only method to go from a token back to its name. // Slow, only use for debugging. std::string Name(token_t token) const; #endif // Restores old position (token after that position). Note that it is not // allowed to rewind right after a seek, because previous tokens are unknown. void Seek(size_t pos); // Select whether identifiers are resolved in global or local scope, // and which scope new identifiers are added to. void EnterLocalScope() { in_local_scope_ = true; } void EnterGlobalScope() { in_local_scope_ = false; } // Drop all current local identifiers. void ResetLocals(); // Methods to check if a token is an identifier and which scope. bool IsLocal() const { return IsLocal(Token()); } bool IsGlobal() const { return IsGlobal(Token()); } static bool IsLocal(token_t token) { return token <= kLocalsStart; } static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } // Methods to find the index position of an identifier (count starting from // 0 for each scope separately). static size_t LocalIndex(token_t token) { DCHECK(IsLocal(token)); return -(token - kLocalsStart); } static size_t GlobalIndex(token_t token) { DCHECK(IsGlobal(token)); return token - kGlobalsStart; } // Methods to check if the current token is a numeric literal considered an // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note // that numbers without a dot outside the [0 .. 2^32) range are errors. bool IsUnsigned() const { return Token() == kUnsigned; } uint32_t AsUnsigned() const { DCHECK(IsUnsigned()); return unsigned_value_; } bool IsDouble() const { return Token() == kDouble; } double AsDouble() const { DCHECK(IsDouble()); return double_value_; } // clang-format off enum { // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting // backwards) // [-10000 .. -1) :: Builtin tokens like keywords // (also includes some special // ones like end of input) // 0 .. 255 :: Single char tokens // 256 .. 256+kMaxIdentifierCount :: Global identifiers kLocalsStart = -10000, #define V(name, _junk1, _junk2, _junk3) kToken_##name, STDLIB_MATH_FUNCTION_LIST(V) STDLIB_ARRAY_TYPE_LIST(V) #undef V #define V(name, _junk1) kToken_##name, STDLIB_MATH_VALUE_LIST(V) #undef V #define V(name) kToken_##name, STDLIB_OTHER_LIST(V) KEYWORD_NAME_LIST(V) #undef V #define V(rawname, name) kToken_##name, LONG_SYMBOL_NAME_LIST(V) #undef V #define V(name, value, string_name) name = value, SPECIAL_TOKEN_LIST(V) #undef V kGlobalsStart = 256, }; // clang-format on static constexpr base::uc32 kEndOfInputU = static_cast(kEndOfInput); private: Utf16CharacterStream* stream_; token_t token_; token_t preceding_token_; token_t next_token_; // Only set when in {rewind} state. size_t position_; // Corresponds to {token} position. size_t preceding_position_; // Corresponds to {preceding_token} position. size_t next_position_; // Only set when in {rewind} state. bool rewind_; std::string identifier_string_; bool in_local_scope_; std::unordered_map local_names_; std::unordered_map global_names_; std::unordered_map property_names_; int global_count_; double double_value_; uint32_t unsigned_value_; bool preceded_by_newline_; // Consume multiple characters. void ConsumeIdentifier(base::uc32 ch); void ConsumeNumber(base::uc32 ch); bool ConsumeCComment(); void ConsumeCPPComment(); void ConsumeString(base::uc32 quote); void ConsumeCompareOrShift(base::uc32 ch); // Classify character categories. bool IsIdentifierStart(base::uc32 ch); bool IsIdentifierPart(base::uc32 ch); bool IsNumberStart(base::uc32 ch); }; } // namespace internal } // namespace v8 #endif // V8_ASMJS_ASM_SCANNER_H_