1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_ASMJS_ASM_PARSER_H_ 6 #define V8_ASMJS_ASM_PARSER_H_ 7 8 #include <memory> 9 #include <string> 10 11 #include "src/asmjs/asm-scanner.h" 12 #include "src/asmjs/asm-types.h" 13 #include "src/base/enum-set.h" 14 #include "src/utils/vector.h" 15 #include "src/wasm/wasm-module-builder.h" 16 #include "src/zone/zone-containers.h" 17 18 namespace v8 { 19 namespace internal { 20 21 class Utf16CharacterStream; 22 23 namespace wasm { 24 25 // A custom parser + validator + wasm converter for asm.js: 26 // http://asmjs.org/spec/latest/ 27 // This parser intentionally avoids the portion of JavaScript parsing 28 // that are not required to determine if code is valid asm.js code. 29 // * It is mostly one pass. 30 // * It bails out on unexpected input. 31 // * It assumes strict ordering insofar as permitted by asm.js validation rules. 32 // * It relies on a custom scanner that provides de-duped identifiers in two 33 // scopes (local + module wide). 34 class AsmJsParser { 35 public: 36 // clang-format off 37 enum StandardMember { 38 kInfinity, 39 kNaN, 40 #define V(_unused1, name, _unused2, _unused3) kMath##name, 41 STDLIB_MATH_FUNCTION_LIST(V) 42 #undef V 43 #define V(name, _unused1) kMath##name, 44 STDLIB_MATH_VALUE_LIST(V) 45 #undef V 46 #define V(name, _unused1, _unused2, _unused3) k##name, 47 STDLIB_ARRAY_TYPE_LIST(V) 48 #undef V 49 }; 50 // clang-format on 51 52 using StdlibSet = base::EnumSet<StandardMember, uint64_t>; 53 54 explicit AsmJsParser(Zone* zone, uintptr_t stack_limit, 55 Utf16CharacterStream* stream); 56 bool Run(); failure_message()57 const char* failure_message() const { return failure_message_; } failure_location()58 int failure_location() const { return failure_location_; } module_builder()59 WasmModuleBuilder* module_builder() { return module_builder_; } stdlib_uses()60 const StdlibSet* stdlib_uses() const { return &stdlib_uses_; } 61 62 private: 63 // clang-format off 64 enum class VarKind { 65 kUnused, 66 kLocal, 67 kGlobal, 68 kSpecial, 69 kFunction, 70 kTable, 71 kImportedFunction, 72 #define V(_unused0, Name, _unused1, _unused2) kMath##Name, 73 STDLIB_MATH_FUNCTION_LIST(V) 74 #undef V 75 #define V(Name, _unused1) kMath##Name, 76 STDLIB_MATH_VALUE_LIST(V) 77 #undef V 78 }; 79 // clang-format on 80 81 // A single import in asm.js can require multiple imports in wasm, if the 82 // function is used with different signatures. {cache} keeps the wasm 83 // imports for the single asm.js import of name {function_name}. 84 struct FunctionImportInfo { 85 Vector<const char> function_name; 86 ZoneUnorderedMap<FunctionSig, uint32_t> cache; 87 88 // Constructor. FunctionImportInfoFunctionImportInfo89 FunctionImportInfo(Vector<const char> name, Zone* zone) 90 : function_name(name), cache(zone) {} 91 }; 92 93 struct VarInfo { 94 AsmType* type = AsmType::None(); 95 WasmFunctionBuilder* function_builder = nullptr; 96 FunctionImportInfo* import = nullptr; 97 uint32_t mask = 0; 98 uint32_t index = 0; 99 VarKind kind = VarKind::kUnused; 100 bool mutable_variable = true; 101 bool function_defined = false; 102 }; 103 104 struct GlobalImport { 105 Vector<const char> import_name; 106 ValueType value_type; 107 VarInfo* var_info; 108 }; 109 110 // Distinguish different kinds of blocks participating in {block_stack}. Each 111 // entry on that stack represents one block in the wasm code, and determines 112 // which block 'break' and 'continue' target in the current context: 113 // - kRegular: The target of a 'break' (with & without identifier). 114 // Pushed by an IterationStatement and a SwitchStatement. 115 // - kLoop : The target of a 'continue' (with & without identifier). 116 // Pushed by an IterationStatement. 117 // - kNamed : The target of a 'break' with a specific identifier. 118 // Pushed by a BlockStatement. 119 // - kOther : Only used for internal blocks, can never be targeted. 120 enum class BlockKind { kRegular, kLoop, kNamed, kOther }; 121 122 // One entry in the {block_stack}, see {BlockKind} above for details. Blocks 123 // without a label have {kTokenNone} set as their label. 124 struct BlockInfo { 125 BlockKind kind; 126 AsmJsScanner::token_t label; 127 }; 128 129 // Helper class to make {TempVariable} safe for nesting. 130 class TemporaryVariableScope; 131 132 template <typename T> 133 class CachedVectors { 134 public: CachedVectors(Zone * zone)135 explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {} 136 zone()137 Zone* zone() const { return reusable_vectors_.get_allocator().zone(); } 138 fill(ZoneVector<T> * vec)139 inline void fill(ZoneVector<T>* vec) { 140 if (reusable_vectors_.empty()) return; 141 reusable_vectors_.back().swap(*vec); 142 reusable_vectors_.pop_back(); 143 vec->clear(); 144 } 145 reuse(ZoneVector<T> * vec)146 inline void reuse(ZoneVector<T>* vec) { 147 reusable_vectors_.emplace_back(std::move(*vec)); 148 } 149 150 private: 151 ZoneVector<ZoneVector<T>> reusable_vectors_; 152 }; 153 154 template <typename T> 155 class CachedVector final : public ZoneVector<T> { 156 public: CachedVector(CachedVectors<T> * cache)157 explicit CachedVector(CachedVectors<T>* cache) 158 : ZoneVector<T>(cache->zone()), cache_(cache) { 159 cache->fill(this); 160 } ~CachedVector()161 ~CachedVector() { cache_->reuse(this); } 162 163 private: 164 CachedVectors<T>* cache_; 165 }; 166 167 Zone* zone_; 168 AsmJsScanner scanner_; 169 WasmModuleBuilder* module_builder_; 170 WasmFunctionBuilder* current_function_builder_; 171 AsmType* return_type_ = nullptr; 172 uintptr_t stack_limit_; 173 StdlibSet stdlib_uses_; 174 Vector<VarInfo> global_var_info_; 175 Vector<VarInfo> local_var_info_; 176 size_t num_globals_ = 0; 177 178 CachedVectors<ValueType> cached_valuetype_vectors_{zone_}; 179 CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_}; 180 CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_}; 181 CachedVectors<int32_t> cached_int_vectors_{zone_}; 182 183 int function_temp_locals_offset_; 184 int function_temp_locals_used_; 185 int function_temp_locals_depth_; 186 187 // Error Handling related 188 bool failed_ = false; 189 const char* failure_message_; 190 int failure_location_ = kNoSourcePosition; 191 192 // Module Related. 193 AsmJsScanner::token_t stdlib_name_ = kTokenNone; 194 AsmJsScanner::token_t foreign_name_ = kTokenNone; 195 AsmJsScanner::token_t heap_name_ = kTokenNone; 196 197 static const AsmJsScanner::token_t kTokenNone = 0; 198 199 // Track if parsing a heap assignment. 200 bool inside_heap_assignment_ = false; 201 AsmType* heap_access_type_ = nullptr; 202 203 ZoneVector<BlockInfo> block_stack_; 204 205 // Types used for stdlib function and their set up. 206 AsmType* stdlib_dq2d_; 207 AsmType* stdlib_dqdq2d_; 208 AsmType* stdlib_i2s_; 209 AsmType* stdlib_ii2s_; 210 AsmType* stdlib_minmax_; 211 AsmType* stdlib_abs_; 212 AsmType* stdlib_ceil_like_; 213 AsmType* stdlib_fround_; 214 215 // When making calls, the return type is needed to lookup signatures. 216 // For `+callsite(..)` or `fround(callsite(..))` use this value to pass 217 // along the coercion. 218 AsmType* call_coercion_ = nullptr; 219 220 // The source position associated with the above {call_coercion}. 221 size_t call_coercion_position_; 222 223 // When making calls, the coercion can also appear in the source stream 224 // syntactically "behind" the call site. For `callsite(..)|0` use this 225 // value to flag that such a coercion must happen. 226 AsmType* call_coercion_deferred_ = nullptr; 227 228 // The source position at which requesting a deferred coercion via the 229 // aforementioned {call_coercion_deferred} is allowed. 230 size_t call_coercion_deferred_position_; 231 232 // The code position of the last heap access shift by an immediate value. 233 // For `heap[expr >> value:NumericLiteral]` this indicates from where to 234 // delete code when the expression is used as part of a valid heap access. 235 // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched. 236 size_t heap_access_shift_position_; 237 uint32_t heap_access_shift_value_; 238 static const size_t kNoHeapAccessShift = -1; 239 240 // Used to track the last label we've seen so it can be matched to later 241 // statements it's attached to. 242 AsmJsScanner::token_t pending_label_ = kTokenNone; 243 244 // Global imports. The list of imported variables that are copied during 245 // module instantiation into a corresponding global variable. 246 ZoneLinkedList<GlobalImport> global_imports_; 247 zone()248 Zone* zone() { return zone_; } 249 Peek(AsmJsScanner::token_t token)250 inline bool Peek(AsmJsScanner::token_t token) { 251 return scanner_.Token() == token; 252 } 253 PeekForZero()254 inline bool PeekForZero() { 255 return (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0); 256 } 257 Check(AsmJsScanner::token_t token)258 inline bool Check(AsmJsScanner::token_t token) { 259 if (scanner_.Token() == token) { 260 scanner_.Next(); 261 return true; 262 } else { 263 return false; 264 } 265 } 266 CheckForZero()267 inline bool CheckForZero() { 268 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) { 269 scanner_.Next(); 270 return true; 271 } else { 272 return false; 273 } 274 } 275 CheckForDouble(double * value)276 inline bool CheckForDouble(double* value) { 277 if (scanner_.IsDouble()) { 278 *value = scanner_.AsDouble(); 279 scanner_.Next(); 280 return true; 281 } else { 282 return false; 283 } 284 } 285 CheckForUnsigned(uint32_t * value)286 inline bool CheckForUnsigned(uint32_t* value) { 287 if (scanner_.IsUnsigned()) { 288 *value = scanner_.AsUnsigned(); 289 scanner_.Next(); 290 return true; 291 } else { 292 return false; 293 } 294 } 295 CheckForUnsignedBelow(uint32_t limit,uint32_t * value)296 inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) { 297 if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) { 298 *value = scanner_.AsUnsigned(); 299 scanner_.Next(); 300 return true; 301 } else { 302 return false; 303 } 304 } 305 Consume()306 inline AsmJsScanner::token_t Consume() { 307 AsmJsScanner::token_t ret = scanner_.Token(); 308 scanner_.Next(); 309 return ret; 310 } 311 312 void SkipSemicolon(); 313 314 VarInfo* GetVarInfo(AsmJsScanner::token_t token); 315 uint32_t VarIndex(VarInfo* info); 316 void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type, 317 ValueType vtype, WasmInitExpr init = WasmInitExpr()); 318 void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type); 319 void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype, 320 bool mutable_variable, VarInfo* info); 321 322 // Allocates a temporary local variable. The given {index} is absolute within 323 // the function body, consider using {TemporaryVariableScope} when nesting. 324 uint32_t TempVariable(int index); 325 326 // Preserves a copy of the scanner's current identifier string in the zone. 327 Vector<const char> CopyCurrentIdentifierString(); 328 329 // Use to set up block stack layers (including synthetic ones for if-else). 330 // Begin/Loop/End below are implemented with these plus code generation. 331 void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0); 332 void BareEnd(); 333 int FindContinueLabelDepth(AsmJsScanner::token_t label); 334 int FindBreakLabelDepth(AsmJsScanner::token_t label); 335 336 // Use to set up actual wasm blocks/loops. 337 void Begin(AsmJsScanner::token_t label = 0); 338 void Loop(AsmJsScanner::token_t label = 0); 339 void End(); 340 341 void InitializeStdlibTypes(); 342 343 FunctionSig* ConvertSignature(AsmType* return_type, 344 const ZoneVector<AsmType*>& params); 345 346 void ValidateModule(); // 6.1 ValidateModule 347 void ValidateModuleParameters(); // 6.1 ValidateModule - parameters 348 void ValidateModuleVars(); // 6.1 ValidateModule - variables 349 void ValidateModuleVar(bool mutable_variable); 350 void ValidateModuleVarImport(VarInfo* info, bool mutable_variable); 351 void ValidateModuleVarStdlib(VarInfo* info); 352 void ValidateModuleVarNewStdlib(VarInfo* info); 353 void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable); 354 355 void ValidateExport(); // 6.2 ValidateExport 356 void ValidateFunctionTable(); // 6.3 ValidateFunctionTable 357 void ValidateFunction(); // 6.4 ValidateFunction 358 void ValidateFunctionParams(ZoneVector<AsmType*>* params); 359 void ValidateFunctionLocals(size_t param_count, 360 ZoneVector<ValueType>* locals); 361 void ValidateStatement(); // 6.5 ValidateStatement 362 void Block(); // 6.5.1 Block 363 void ExpressionStatement(); // 6.5.2 ExpressionStatement 364 void EmptyStatement(); // 6.5.3 EmptyStatement 365 void IfStatement(); // 6.5.4 IfStatement 366 void ReturnStatement(); // 6.5.5 ReturnStatement 367 bool IterationStatement(); // 6.5.6 IterationStatement 368 void WhileStatement(); // 6.5.6 IterationStatement - while 369 void DoStatement(); // 6.5.6 IterationStatement - do 370 void ForStatement(); // 6.5.6 IterationStatement - for 371 void BreakStatement(); // 6.5.7 BreakStatement 372 void ContinueStatement(); // 6.5.8 ContinueStatement 373 void LabelledStatement(); // 6.5.9 LabelledStatement 374 void SwitchStatement(); // 6.5.10 SwitchStatement 375 void ValidateCase(); // 6.6. ValidateCase 376 void ValidateDefault(); // 6.7 ValidateDefault 377 AsmType* ValidateExpression(); // 6.8 ValidateExpression 378 AsmType* Expression(AsmType* expect); // 6.8.1 Expression 379 AsmType* NumericLiteral(); // 6.8.2 NumericLiteral 380 AsmType* Identifier(); // 6.8.3 Identifier 381 AsmType* CallExpression(); // 6.8.4 CallExpression 382 AsmType* MemberExpression(); // 6.8.5 MemberExpression 383 AsmType* AssignmentExpression(); // 6.8.6 AssignmentExpression 384 AsmType* UnaryExpression(); // 6.8.7 UnaryExpression 385 AsmType* MultiplicativeExpression(); // 6.8.8 MultiplicativeExpression 386 AsmType* AdditiveExpression(); // 6.8.9 AdditiveExpression 387 AsmType* ShiftExpression(); // 6.8.10 ShiftExpression 388 AsmType* RelationalExpression(); // 6.8.11 RelationalExpression 389 AsmType* EqualityExpression(); // 6.8.12 EqualityExpression 390 AsmType* BitwiseANDExpression(); // 6.8.13 BitwiseANDExpression 391 AsmType* BitwiseXORExpression(); // 6.8.14 BitwiseXORExpression 392 AsmType* BitwiseORExpression(); // 6.8.15 BitwiseORExpression 393 AsmType* ConditionalExpression(); // 6.8.16 ConditionalExpression 394 AsmType* ParenthesizedExpression(); // 6.8.17 ParenthesiedExpression 395 AsmType* ValidateCall(); // 6.9 ValidateCall 396 bool PeekCall(); // 6.9 ValidateCall - helper 397 void ValidateHeapAccess(); // 6.10 ValidateHeapAccess 398 void ValidateFloatCoercion(); // 6.11 ValidateFloatCoercion 399 400 // Used as part of {ForStatement}. Scans forward to the next `)` in order to 401 // skip over the third expression in a for-statement. This is one piece that 402 // makes this parser not be a pure single-pass. 403 void ScanToClosingParenthesis(); 404 405 // Used as part of {SwitchStatement}. Collects all case labels in the current 406 // switch-statement, then resets the scanner position. This is one piece that 407 // makes this parser not be a pure single-pass. 408 void GatherCases(ZoneVector<int32_t>* cases); 409 }; 410 411 } // namespace wasm 412 } // namespace internal 413 } // namespace v8 414 415 #endif // V8_ASMJS_ASM_PARSER_H_ 416