1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_ASMJS_ASM_SCANNER_H_ 6 #define V8_ASMJS_ASM_SCANNER_H_ 7 8 #include <memory> 9 #include <string> 10 #include <unordered_map> 11 12 #include "src/asmjs/asm-names.h" 13 #include "src/base/logging.h" 14 #include "src/base/strings.h" 15 #include "src/common/globals.h" 16 17 namespace v8 { 18 namespace internal { 19 20 class Utf16CharacterStream; 21 22 // A custom scanner to extract the token stream needed to parse valid 23 // asm.js: http://asmjs.org/spec/latest/ 24 // This scanner intentionally avoids the portion of JavaScript lexing 25 // that are not required to determine if code is valid asm.js code. 26 // * Strings are disallowed except for 'use asm'. 27 // * Only the subset of keywords needed to check asm.js invariants are 28 // included. 29 // * Identifiers are accumulated into local + global string tables 30 // (for performance). 31 class V8_EXPORT_PRIVATE AsmJsScanner { 32 public: 33 using token_t = int32_t; 34 35 explicit AsmJsScanner(Utf16CharacterStream* stream); 36 37 // Get current token. Token()38 token_t Token() const { return token_; } 39 // Get position of current token. Position()40 size_t Position() const { return position_; } 41 // Advance to the next token. 42 void Next(); 43 // Back up by one token. 44 void Rewind(); 45 46 // Get raw string for current identifier. Note that the returned string will 47 // become invalid when the scanner advances, create a copy to preserve it. GetIdentifierString()48 const std::string& GetIdentifierString() const { 49 // Identifier strings don't work after a rewind. 50 DCHECK(!rewind_); 51 return identifier_string_; 52 } 53 54 // Check if we just passed a newline. IsPrecededByNewline()55 bool IsPrecededByNewline() const { 56 // Newline tracking doesn't work if you back up. 57 DCHECK(!rewind_); 58 return preceded_by_newline_; 59 } 60 61 #if DEBUG 62 // Debug only method to go from a token back to its name. 63 // Slow, only use for debugging. 64 std::string Name(token_t token) const; 65 #endif 66 67 // Restores old position (token after that position). Note that it is not 68 // allowed to rewind right after a seek, because previous tokens are unknown. 69 void Seek(size_t pos); 70 71 // Select whether identifiers are resolved in global or local scope, 72 // and which scope new identifiers are added to. EnterLocalScope()73 void EnterLocalScope() { in_local_scope_ = true; } EnterGlobalScope()74 void EnterGlobalScope() { in_local_scope_ = false; } 75 // Drop all current local identifiers. 76 void ResetLocals(); 77 78 // Methods to check if a token is an identifier and which scope. IsLocal()79 bool IsLocal() const { return IsLocal(Token()); } IsGlobal()80 bool IsGlobal() const { return IsGlobal(Token()); } IsLocal(token_t token)81 static bool IsLocal(token_t token) { return token <= kLocalsStart; } IsGlobal(token_t token)82 static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } 83 // Methods to find the index position of an identifier (count starting from 84 // 0 for each scope separately). LocalIndex(token_t token)85 static size_t LocalIndex(token_t token) { 86 DCHECK(IsLocal(token)); 87 return -(token - kLocalsStart); 88 } GlobalIndex(token_t token)89 static size_t GlobalIndex(token_t token) { 90 DCHECK(IsGlobal(token)); 91 return token - kGlobalsStart; 92 } 93 94 // Methods to check if the current token is a numeric literal considered an 95 // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note 96 // that numbers without a dot outside the [0 .. 2^32) range are errors. IsUnsigned()97 bool IsUnsigned() const { return Token() == kUnsigned; } AsUnsigned()98 uint32_t AsUnsigned() const { 99 DCHECK(IsUnsigned()); 100 return unsigned_value_; 101 } IsDouble()102 bool IsDouble() const { return Token() == kDouble; } AsDouble()103 double AsDouble() const { 104 DCHECK(IsDouble()); 105 return double_value_; 106 } 107 108 // clang-format off 109 enum { 110 // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting 111 // backwards) 112 // [-10000 .. -1) :: Builtin tokens like keywords 113 // (also includes some special 114 // ones like end of input) 115 // 0 .. 255 :: Single char tokens 116 // 256 .. 256+kMaxIdentifierCount :: Global identifiers 117 kLocalsStart = -10000, 118 #define V(name, _junk1, _junk2, _junk3) kToken_##name, 119 STDLIB_MATH_FUNCTION_LIST(V) 120 STDLIB_ARRAY_TYPE_LIST(V) 121 #undef V 122 #define V(name, _junk1) kToken_##name, 123 STDLIB_MATH_VALUE_LIST(V) 124 #undef V 125 #define V(name) kToken_##name, 126 STDLIB_OTHER_LIST(V) 127 KEYWORD_NAME_LIST(V) 128 #undef V 129 #define V(rawname, name) kToken_##name, 130 LONG_SYMBOL_NAME_LIST(V) 131 #undef V 132 #define V(name, value, string_name) name = value, 133 SPECIAL_TOKEN_LIST(V) 134 #undef V 135 kGlobalsStart = 256, 136 }; 137 // clang-format on 138 139 static constexpr base::uc32 kEndOfInputU = 140 static_cast<base::uc32>(kEndOfInput); 141 142 private: 143 Utf16CharacterStream* stream_; 144 token_t token_; 145 token_t preceding_token_; 146 token_t next_token_; // Only set when in {rewind} state. 147 size_t position_; // Corresponds to {token} position. 148 size_t preceding_position_; // Corresponds to {preceding_token} position. 149 size_t next_position_; // Only set when in {rewind} state. 150 bool rewind_; 151 std::string identifier_string_; 152 bool in_local_scope_; 153 std::unordered_map<std::string, token_t> local_names_; 154 std::unordered_map<std::string, token_t> global_names_; 155 std::unordered_map<std::string, token_t> property_names_; 156 int global_count_; 157 double double_value_; 158 uint32_t unsigned_value_; 159 bool preceded_by_newline_; 160 161 // Consume multiple characters. 162 void ConsumeIdentifier(base::uc32 ch); 163 void ConsumeNumber(base::uc32 ch); 164 bool ConsumeCComment(); 165 void ConsumeCPPComment(); 166 void ConsumeString(base::uc32 quote); 167 void ConsumeCompareOrShift(base::uc32 ch); 168 169 // Classify character categories. 170 bool IsIdentifierStart(base::uc32 ch); 171 bool IsIdentifierPart(base::uc32 ch); 172 bool IsNumberStart(base::uc32 ch); 173 }; 174 175 } // namespace internal 176 } // namespace v8 177 178 #endif // V8_ASMJS_ASM_SCANNER_H_ 179