1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_ASMJS_ASM_PARSER_H_ 6 #define V8_ASMJS_ASM_PARSER_H_ 7 8 #include <memory> 9 10 #include "src/asmjs/asm-scanner.h" 11 #include "src/asmjs/asm-types.h" 12 #include "src/base/enum-set.h" 13 #include "src/base/vector.h" 14 #include "src/wasm/wasm-module-builder.h" 15 #include "src/zone/zone-containers.h" 16 17 namespace v8 { 18 namespace internal { 19 20 class Utf16CharacterStream; 21 22 namespace wasm { 23 24 // A custom parser + validator + wasm converter for asm.js: 25 // http://asmjs.org/spec/latest/ 26 // This parser intentionally avoids the portion of JavaScript parsing 27 // that are not required to determine if code is valid asm.js code. 28 // * It is mostly one pass. 29 // * It bails out on unexpected input. 30 // * It assumes strict ordering insofar as permitted by asm.js validation rules. 31 // * It relies on a custom scanner that provides de-duped identifiers in two 32 // scopes (local + module wide). 33 class AsmJsParser { 34 public: 35 // clang-format off 36 enum StandardMember { 37 kInfinity, 38 kNaN, 39 #define V(_unused1, name, _unused2, _unused3) kMath##name, 40 STDLIB_MATH_FUNCTION_LIST(V) 41 #undef V 42 #define V(name, _unused1) kMath##name, 43 STDLIB_MATH_VALUE_LIST(V) 44 #undef V 45 #define V(name, _unused1, _unused2, _unused3) k##name, 46 STDLIB_ARRAY_TYPE_LIST(V) 47 #undef V 48 }; 49 // clang-format on 50 51 using StdlibSet = base::EnumSet<StandardMember, uint64_t>; 52 53 explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, 54 Utf16CharacterStream* stream); 55 bool Run(); failure_message()56 const char* failure_message() const { return failure_message_; } failure_location()57 int failure_location() const { return failure_location_; } module_builder()58 WasmModuleBuilder* module_builder() { return module_builder_; } stdlib_uses()59 const StdlibSet* stdlib_uses() const { return &stdlib_uses_; } 60 61 private: 62 // clang-format off 63 enum class VarKind { 64 kUnused, 65 kLocal, 66 kGlobal, 67 kSpecial, 68 kFunction, 69 kTable, 70 kImportedFunction, 71 #define V(_unused0, Name, _unused1, _unused2) kMath##Name, 72 STDLIB_MATH_FUNCTION_LIST(V) 73 #undef V 74 #define V(Name, _unused1) kMath##Name, 75 STDLIB_MATH_VALUE_LIST(V) 76 #undef V 77 }; 78 // clang-format on 79 80 // A single import in asm.js can require multiple imports in wasm, if the 81 // function is used with different signatures. {cache} keeps the wasm 82 // imports for the single asm.js import of name {function_name}. 83 struct FunctionImportInfo { 84 base::Vector<const char> function_name; 85 ZoneUnorderedMap<FunctionSig, uint32_t> cache; 86 87 // Constructor. FunctionImportInfoFunctionImportInfo88 FunctionImportInfo(base::Vector<const char> name, Zone* zone) 89 : function_name(name), cache(zone) {} 90 }; 91 92 struct VarInfo { 93 AsmType* type = AsmType::None(); 94 WasmFunctionBuilder* function_builder = nullptr; 95 FunctionImportInfo* import = nullptr; 96 uint32_t mask = 0; 97 uint32_t index = 0; 98 VarKind kind = VarKind::kUnused; 99 bool mutable_variable = true; 100 bool function_defined = false; 101 }; 102 103 struct GlobalImport { 104 base::Vector<const char> import_name; 105 ValueType value_type; 106 VarInfo* var_info; 107 }; 108 109 // Distinguish different kinds of blocks participating in {block_stack}. Each 110 // entry on that stack represents one block in the wasm code, and determines 111 // which block 'break' and 'continue' target in the current context: 112 // - kRegular: The target of a 'break' (with & without identifier). 113 // Pushed by an IterationStatement and a SwitchStatement. 114 // - kLoop : The target of a 'continue' (with & without identifier). 115 // Pushed by an IterationStatement. 116 // - kNamed : The target of a 'break' with a specific identifier. 117 // Pushed by a BlockStatement. 118 // - kOther : Only used for internal blocks, can never be targeted. 119 enum class BlockKind { kRegular, kLoop, kNamed, kOther }; 120 121 // One entry in the {block_stack}, see {BlockKind} above for details. Blocks 122 // without a label have {kTokenNone} set as their label. 123 struct BlockInfo { 124 BlockKind kind; 125 AsmJsScanner::token_t label; 126 }; 127 128 // Helper class to make {TempVariable} safe for nesting. 129 class TemporaryVariableScope; 130 131 template <typename T> 132 class CachedVectors { 133 public: CachedVectors(Zone * zone)134 explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {} 135 zone()136 Zone* zone() const { return reusable_vectors_.get_allocator().zone(); } 137 fill(ZoneVector<T> * vec)138 inline void fill(ZoneVector<T>* vec) { 139 if (reusable_vectors_.empty()) return; 140 reusable_vectors_.back().swap(*vec); 141 reusable_vectors_.pop_back(); 142 vec->clear(); 143 } 144 reuse(ZoneVector<T> * vec)145 inline void reuse(ZoneVector<T>* vec) { 146 reusable_vectors_.emplace_back(std::move(*vec)); 147 } 148 149 private: 150 ZoneVector<ZoneVector<T>> reusable_vectors_; 151 }; 152 153 template <typename T> 154 class CachedVector final : public ZoneVector<T> { 155 public: CachedVector(CachedVectors<T> * cache)156 explicit CachedVector(CachedVectors<T>* cache) 157 : ZoneVector<T>(cache->zone()), cache_(cache) { 158 cache->fill(this); 159 } ~CachedVector()160 ~CachedVector() { cache_->reuse(this); } 161 162 private: 163 CachedVectors<T>* cache_; 164 }; 165 166 Zone* zone_; 167 AsmJsScanner scanner_; 168 WasmModuleBuilder* module_builder_; 169 WasmFunctionBuilder* current_function_builder_; 170 AsmType* return_type_ = nullptr; 171 uintptr_t stack_limit_; 172 StdlibSet stdlib_uses_; 173 base::Vector<VarInfo> global_var_info_; 174 base::Vector<VarInfo> local_var_info_; 175 size_t num_globals_ = 0; 176 177 CachedVectors<ValueType> cached_valuetype_vectors_{zone_}; 178 CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_}; 179 CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_}; 180 CachedVectors<int32_t> cached_int_vectors_{zone_}; 181 182 int function_temp_locals_offset_; 183 int function_temp_locals_used_; 184 int function_temp_locals_depth_; 185 186 // Error Handling related 187 bool failed_ = false; 188 const char* failure_message_; 189 int failure_location_ = kNoSourcePosition; 190 191 // Module Related. 192 AsmJsScanner::token_t stdlib_name_ = kTokenNone; 193 AsmJsScanner::token_t foreign_name_ = kTokenNone; 194 AsmJsScanner::token_t heap_name_ = kTokenNone; 195 196 static const AsmJsScanner::token_t kTokenNone = 0; 197 198 // Track if parsing a heap assignment. 199 bool inside_heap_assignment_ = false; 200 AsmType* heap_access_type_ = nullptr; 201 202 ZoneVector<BlockInfo> block_stack_; 203 204 // Types used for stdlib function and their set up. 205 AsmType* stdlib_dq2d_; 206 AsmType* stdlib_dqdq2d_; 207 AsmType* stdlib_i2s_; 208 AsmType* stdlib_ii2s_; 209 AsmType* stdlib_minmax_; 210 AsmType* stdlib_abs_; 211 AsmType* stdlib_ceil_like_; 212 AsmType* stdlib_fround_; 213 214 // When making calls, the return type is needed to lookup signatures. 215 // For `+callsite(..)` or `fround(callsite(..))` use this value to pass 216 // along the coercion. 217 AsmType* call_coercion_ = nullptr; 218 219 // The source position associated with the above {call_coercion}. 220 size_t call_coercion_position_; 221 222 // When making calls, the coercion can also appear in the source stream 223 // syntactically "behind" the call site. For `callsite(..)|0` use this 224 // value to flag that such a coercion must happen. 225 AsmType* call_coercion_deferred_ = nullptr; 226 227 // The source position at which requesting a deferred coercion via the 228 // aforementioned {call_coercion_deferred} is allowed. 229 size_t call_coercion_deferred_position_; 230 231 // The code position of the last heap access shift by an immediate value. 232 // For `heap[expr >> value:NumericLiteral]` this indicates from where to 233 // delete code when the expression is used as part of a valid heap access. 234 // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched. 235 size_t heap_access_shift_position_; 236 uint32_t heap_access_shift_value_; 237 static const size_t kNoHeapAccessShift = -1; 238 239 // Used to track the last label we've seen so it can be matched to later 240 // statements it's attached to. 241 AsmJsScanner::token_t pending_label_ = kTokenNone; 242 243 // Global imports. The list of imported variables that are copied during 244 // module instantiation into a corresponding global variable. 245 ZoneLinkedList<GlobalImport> global_imports_; 246 zone()247 Zone* zone() { return zone_; } 248 Peek(AsmJsScanner::token_t token)249 inline bool Peek(AsmJsScanner::token_t token) { 250 return scanner_.Token() == token; 251 } 252 PeekForZero()253 inline bool PeekForZero() { 254 return (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0); 255 } 256 Check(AsmJsScanner::token_t token)257 inline bool Check(AsmJsScanner::token_t token) { 258 if (scanner_.Token() == token) { 259 scanner_.Next(); 260 return true; 261 } else { 262 return false; 263 } 264 } 265 CheckForZero()266 inline bool CheckForZero() { 267 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) { 268 scanner_.Next(); 269 return true; 270 } else { 271 return false; 272 } 273 } 274 CheckForDouble(double * value)275 inline bool CheckForDouble(double* value) { 276 if (scanner_.IsDouble()) { 277 *value = scanner_.AsDouble(); 278 scanner_.Next(); 279 return true; 280 } else { 281 return false; 282 } 283 } 284 CheckForUnsigned(uint32_t * value)285 inline bool CheckForUnsigned(uint32_t* value) { 286 if (scanner_.IsUnsigned()) { 287 *value = scanner_.AsUnsigned(); 288 scanner_.Next(); 289 return true; 290 } else { 291 return false; 292 } 293 } 294 CheckForUnsignedBelow(uint32_t limit,uint32_t * value)295 inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) { 296 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) { 297 *value = scanner_.AsUnsigned(); 298 scanner_.Next(); 299 return true; 300 } else { 301 return false; 302 } 303 } 304 Consume()305 inline AsmJsScanner::token_t Consume() { 306 AsmJsScanner::token_t ret = scanner_.Token(); 307 scanner_.Next(); 308 return ret; 309 } 310 311 void SkipSemicolon(); 312 313 VarInfo* GetVarInfo(AsmJsScanner::token_t token); 314 uint32_t VarIndex(VarInfo* info); 315 void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type, 316 ValueType vtype, WasmInitExpr init = WasmInitExpr()); 317 void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type); 318 void AddGlobalImport(base::Vector<const char> name, AsmType* type, 319 ValueType vtype, bool mutable_variable, VarInfo* info); 320 321 // Allocates a temporary local variable. The given {index} is absolute within 322 // the function body, consider using {TemporaryVariableScope} when nesting. 323 uint32_t TempVariable(int index); 324 325 // Preserves a copy of the scanner's current identifier string in the zone. 326 base::Vector<const char> CopyCurrentIdentifierString(); 327 328 // Use to set up block stack layers (including synthetic ones for if-else). 329 // Begin/Loop/End below are implemented with these plus code generation. 330 void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0); 331 void BareEnd(); 332 int FindContinueLabelDepth(AsmJsScanner::token_t label); 333 int FindBreakLabelDepth(AsmJsScanner::token_t label); 334 335 // Use to set up actual wasm blocks/loops. 336 void Begin(AsmJsScanner::token_t label = 0); 337 void Loop(AsmJsScanner::token_t label = 0); 338 void End(); 339 340 void InitializeStdlibTypes(); 341 342 FunctionSig* ConvertSignature(AsmType* return_type, 343 const ZoneVector<AsmType*>& params); 344 345 void ValidateModule(); // 6.1 ValidateModule 346 void ValidateModuleParameters(); // 6.1 ValidateModule - parameters 347 void ValidateModuleVars(); // 6.1 ValidateModule - variables 348 void ValidateModuleVar(bool mutable_variable); 349 void ValidateModuleVarImport(VarInfo* info, bool mutable_variable); 350 void ValidateModuleVarStdlib(VarInfo* info); 351 void ValidateModuleVarNewStdlib(VarInfo* info); 352 void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable); 353 354 void ValidateExport(); // 6.2 ValidateExport 355 void ValidateFunctionTable(); // 6.3 ValidateFunctionTable 356 void ValidateFunction(); // 6.4 ValidateFunction 357 void ValidateFunctionParams(ZoneVector<AsmType*>* params); 358 void ValidateFunctionLocals(size_t param_count, 359 ZoneVector<ValueType>* locals); 360 void ValidateStatement(); // 6.5 ValidateStatement 361 void Block(); // 6.5.1 Block 362 void ExpressionStatement(); // 6.5.2 ExpressionStatement 363 void EmptyStatement(); // 6.5.3 EmptyStatement 364 void IfStatement(); // 6.5.4 IfStatement 365 void ReturnStatement(); // 6.5.5 ReturnStatement 366 bool IterationStatement(); // 6.5.6 IterationStatement 367 void WhileStatement(); // 6.5.6 IterationStatement - while 368 void DoStatement(); // 6.5.6 IterationStatement - do 369 void ForStatement(); // 6.5.6 IterationStatement - for 370 void BreakStatement(); // 6.5.7 BreakStatement 371 void ContinueStatement(); // 6.5.8 ContinueStatement 372 void LabelledStatement(); // 6.5.9 LabelledStatement 373 void SwitchStatement(); // 6.5.10 SwitchStatement 374 void ValidateCase(); // 6.6. ValidateCase 375 void ValidateDefault(); // 6.7 ValidateDefault 376 AsmType* ValidateExpression(); // 6.8 ValidateExpression 377 AsmType* Expression(AsmType* expect); // 6.8.1 Expression 378 AsmType* NumericLiteral(); // 6.8.2 NumericLiteral 379 AsmType* Identifier(); // 6.8.3 Identifier 380 AsmType* CallExpression(); // 6.8.4 CallExpression 381 AsmType* MemberExpression(); // 6.8.5 MemberExpression 382 AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression 383 AsmType* UnaryExpression(); // 6.8.7 UnaryExpression 384 AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression 385 AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression 386 AsmType* ShiftExpression(); // 6.8.10 ShiftExpression 387 AsmType* RelationalExpression(); // 6.8.11 RelationalExpression 388 AsmType* EqualityExpression(); // 6.8.12 EqualityExpression 389 AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression 390 AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression 391 AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression 392 AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression 393 AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression 394 AsmType* ValidateCall(); // 6.9 ValidateCall 395 bool PeekCall(); // 6.9 ValidateCall - helper 396 void ValidateHeapAccess(); // 6.10 ValidateHeapAccess 397 void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion 398 399 // Used as part of {ForStatement}. Scans forward to the next `)` in order to 400 // skip over the third expression in a for-statement. This is one piece that 401 // makes this parser not be a pure single-pass. 402 void ScanToClosingParenthesis(); 403 404 // Used as part of {SwitchStatement}. Collects all case labels in the current 405 // switch-statement, then resets the scanner position. This is one piece that 406 // makes this parser not be a pure single-pass. 407 void GatherCases(ZoneVector<int32_t>* cases); 408 }; 409 410 } // namespace wasm 411 } // namespace internal 412 } // namespace v8 413 414 #endif // V8_ASMJS_ASM_PARSER_H_ 415