• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_PARSER_H_
6 #define V8_ASMJS_ASM_PARSER_H_
7 
8 #include <memory>
9 #include <string>
10 
11 #include "src/asmjs/asm-scanner.h"
12 #include "src/asmjs/asm-types.h"
13 #include "src/base/enum-set.h"
14 #include "src/utils/vector.h"
15 #include "src/wasm/wasm-module-builder.h"
16 #include "src/zone/zone-containers.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 class Utf16CharacterStream;
22 
23 namespace wasm {
24 
25 // A custom parser + validator + wasm converter for asm.js:
26 // http://asmjs.org/spec/latest/
27 // This parser intentionally avoids the portion of JavaScript parsing
28 // that are not required to determine if code is valid asm.js code.
29 // * It is mostly one pass.
30 // * It bails out on unexpected input.
31 // * It assumes strict ordering insofar as permitted by asm.js validation rules.
32 // * It relies on a custom scanner that provides de-duped identifiers in two
33 //   scopes (local + module wide).
34 class AsmJsParser {
35  public:
36   // clang-format off
37   enum StandardMember {
38     kInfinity,
39     kNaN,
40 #define V(_unused1, name, _unused2, _unused3) kMath##name,
41     STDLIB_MATH_FUNCTION_LIST(V)
42 #undef V
43 #define V(name, _unused1) kMath##name,
44     STDLIB_MATH_VALUE_LIST(V)
45 #undef V
46 #define V(name, _unused1, _unused2, _unused3) k##name,
47     STDLIB_ARRAY_TYPE_LIST(V)
48 #undef V
49   };
50   // clang-format on
51 
52   using StdlibSet = base::EnumSet<StandardMember, uint64_t>;
53 
54   explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
55                        Utf16CharacterStream* stream);
56   bool Run();
failure_message()57   const char* failure_message() const { return failure_message_; }
failure_location()58   int failure_location() const { return failure_location_; }
module_builder()59   WasmModuleBuilder* module_builder() { return module_builder_; }
stdlib_uses()60   const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
61 
62  private:
63   // clang-format off
64   enum class VarKind {
65     kUnused,
66     kLocal,
67     kGlobal,
68     kSpecial,
69     kFunction,
70     kTable,
71     kImportedFunction,
72 #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
73     STDLIB_MATH_FUNCTION_LIST(V)
74 #undef V
75 #define V(Name, _unused1) kMath##Name,
76     STDLIB_MATH_VALUE_LIST(V)
77 #undef V
78   };
79   // clang-format on
80 
81   // A single import in asm.js can require multiple imports in wasm, if the
82   // function is used with different signatures. {cache} keeps the wasm
83   // imports for the single asm.js import of name {function_name}.
84   struct FunctionImportInfo {
85     Vector<const char> function_name;
86     ZoneUnorderedMap<FunctionSig, uint32_t> cache;
87 
88     // Constructor.
FunctionImportInfoFunctionImportInfo89     FunctionImportInfo(Vector<const char> name, Zone* zone)
90         : function_name(name), cache(zone) {}
91   };
92 
93   struct VarInfo {
94     AsmType* type = AsmType::None();
95     WasmFunctionBuilder* function_builder = nullptr;
96     FunctionImportInfo* import = nullptr;
97     uint32_t mask = 0;
98     uint32_t index = 0;
99     VarKind kind = VarKind::kUnused;
100     bool mutable_variable = true;
101     bool function_defined = false;
102   };
103 
104   struct GlobalImport {
105     Vector<const char> import_name;
106     ValueType value_type;
107     VarInfo* var_info;
108   };
109 
110   // Distinguish different kinds of blocks participating in {block_stack}. Each
111   // entry on that stack represents one block in the wasm code, and determines
112   // which block 'break' and 'continue' target in the current context:
113   //  - kRegular: The target of a 'break' (with & without identifier).
114   //              Pushed by an IterationStatement and a SwitchStatement.
115   //  - kLoop   : The target of a 'continue' (with & without identifier).
116   //              Pushed by an IterationStatement.
117   //  - kNamed  : The target of a 'break' with a specific identifier.
118   //              Pushed by a BlockStatement.
119   //  - kOther  : Only used for internal blocks, can never be targeted.
120   enum class BlockKind { kRegular, kLoop, kNamed, kOther };
121 
122   // One entry in the {block_stack}, see {BlockKind} above for details. Blocks
123   // without a label have {kTokenNone} set as their label.
124   struct BlockInfo {
125     BlockKind kind;
126     AsmJsScanner::token_t label;
127   };
128 
129   // Helper class to make {TempVariable} safe for nesting.
130   class TemporaryVariableScope;
131 
132   template <typename T>
133   class CachedVectors {
134    public:
CachedVectors(Zone * zone)135     explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
136 
zone()137     Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
138 
fill(ZoneVector<T> * vec)139     inline void fill(ZoneVector<T>* vec) {
140       if (reusable_vectors_.empty()) return;
141       reusable_vectors_.back().swap(*vec);
142       reusable_vectors_.pop_back();
143       vec->clear();
144     }
145 
reuse(ZoneVector<T> * vec)146     inline void reuse(ZoneVector<T>* vec) {
147       reusable_vectors_.emplace_back(std::move(*vec));
148     }
149 
150    private:
151     ZoneVector<ZoneVector<T>> reusable_vectors_;
152   };
153 
154   template <typename T>
155   class CachedVector final : public ZoneVector<T> {
156    public:
CachedVector(CachedVectors<T> * cache)157     explicit CachedVector(CachedVectors<T>* cache)
158         : ZoneVector<T>(cache->zone()), cache_(cache) {
159       cache->fill(this);
160     }
~CachedVector()161     ~CachedVector() { cache_->reuse(this); }
162 
163    private:
164     CachedVectors<T>* cache_;
165   };
166 
167   Zone* zone_;
168   AsmJsScanner scanner_;
169   WasmModuleBuilder* module_builder_;
170   WasmFunctionBuilder* current_function_builder_;
171   AsmType* return_type_ = nullptr;
172   uintptr_t stack_limit_;
173   StdlibSet stdlib_uses_;
174   Vector<VarInfo> global_var_info_;
175   Vector<VarInfo> local_var_info_;
176   size_t num_globals_ = 0;
177 
178   CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
179   CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
180   CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
181   CachedVectors<int32_t> cached_int_vectors_{zone_};
182 
183   int function_temp_locals_offset_;
184   int function_temp_locals_used_;
185   int function_temp_locals_depth_;
186 
187   // Error Handling related
188   bool failed_ = false;
189   const char* failure_message_;
190   int failure_location_ = kNoSourcePosition;
191 
192   // Module Related.
193   AsmJsScanner::token_t stdlib_name_ = kTokenNone;
194   AsmJsScanner::token_t foreign_name_ = kTokenNone;
195   AsmJsScanner::token_t heap_name_ = kTokenNone;
196 
197   static const AsmJsScanner::token_t kTokenNone = 0;
198 
199   // Track if parsing a heap assignment.
200   bool inside_heap_assignment_ = false;
201   AsmType* heap_access_type_ = nullptr;
202 
203   ZoneVector<BlockInfo> block_stack_;
204 
205   // Types used for stdlib function and their set up.
206   AsmType* stdlib_dq2d_;
207   AsmType* stdlib_dqdq2d_;
208   AsmType* stdlib_i2s_;
209   AsmType* stdlib_ii2s_;
210   AsmType* stdlib_minmax_;
211   AsmType* stdlib_abs_;
212   AsmType* stdlib_ceil_like_;
213   AsmType* stdlib_fround_;
214 
215   // When making calls, the return type is needed to lookup signatures.
216   // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
217   // along the coercion.
218   AsmType* call_coercion_ = nullptr;
219 
220   // The source position associated with the above {call_coercion}.
221   size_t call_coercion_position_;
222 
223   // When making calls, the coercion can also appear in the source stream
224   // syntactically "behind" the call site. For `callsite(..)|0` use this
225   // value to flag that such a coercion must happen.
226   AsmType* call_coercion_deferred_ = nullptr;
227 
228   // The source position at which requesting a deferred coercion via the
229   // aforementioned {call_coercion_deferred} is allowed.
230   size_t call_coercion_deferred_position_;
231 
232   // The code position of the last heap access shift by an immediate value.
233   // For `heap[expr >> value:NumericLiteral]` this indicates from where to
234   // delete code when the expression is used as part of a valid heap access.
235   // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
236   size_t heap_access_shift_position_;
237   uint32_t heap_access_shift_value_;
238   static const size_t kNoHeapAccessShift = -1;
239 
240   // Used to track the last label we've seen so it can be matched to later
241   // statements it's attached to.
242   AsmJsScanner::token_t pending_label_ = kTokenNone;
243 
244   // Global imports. The list of imported variables that are copied during
245   // module instantiation into a corresponding global variable.
246   ZoneLinkedList<GlobalImport> global_imports_;
247 
zone()248   Zone* zone() { return zone_; }
249 
Peek(AsmJsScanner::token_t token)250   inline bool Peek(AsmJsScanner::token_t token) {
251     return scanner_.Token() == token;
252   }
253 
PeekForZero()254   inline bool PeekForZero() {
255     return (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0);
256   }
257 
Check(AsmJsScanner::token_t token)258   inline bool Check(AsmJsScanner::token_t token) {
259     if (scanner_.Token() == token) {
260       scanner_.Next();
261       return true;
262     } else {
263       return false;
264     }
265   }
266 
CheckForZero()267   inline bool CheckForZero() {
268     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
269       scanner_.Next();
270       return true;
271     } else {
272       return false;
273     }
274   }
275 
CheckForDouble(double * value)276   inline bool CheckForDouble(double* value) {
277     if (scanner_.IsDouble()) {
278       *value = scanner_.AsDouble();
279       scanner_.Next();
280       return true;
281     } else {
282       return false;
283     }
284   }
285 
CheckForUnsigned(uint32_t * value)286   inline bool CheckForUnsigned(uint32_t* value) {
287     if (scanner_.IsUnsigned()) {
288       *value = scanner_.AsUnsigned();
289       scanner_.Next();
290       return true;
291     } else {
292       return false;
293     }
294   }
295 
CheckForUnsignedBelow(uint32_t limit,uint32_t * value)296   inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
297     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
298       *value = scanner_.AsUnsigned();
299       scanner_.Next();
300       return true;
301     } else {
302       return false;
303     }
304   }
305 
Consume()306   inline AsmJsScanner::token_t Consume() {
307     AsmJsScanner::token_t ret = scanner_.Token();
308     scanner_.Next();
309     return ret;
310   }
311 
312   void SkipSemicolon();
313 
314   VarInfo* GetVarInfo(AsmJsScanner::token_t token);
315   uint32_t VarIndex(VarInfo* info);
316   void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
317                      ValueType vtype, WasmInitExpr init = WasmInitExpr());
318   void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
319   void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
320                        bool mutable_variable, VarInfo* info);
321 
322   // Allocates a temporary local variable. The given {index} is absolute within
323   // the function body, consider using {TemporaryVariableScope} when nesting.
324   uint32_t TempVariable(int index);
325 
326   // Preserves a copy of the scanner's current identifier string in the zone.
327   Vector<const char> CopyCurrentIdentifierString();
328 
329   // Use to set up block stack layers (including synthetic ones for if-else).
330   // Begin/Loop/End below are implemented with these plus code generation.
331   void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0);
332   void BareEnd();
333   int FindContinueLabelDepth(AsmJsScanner::token_t label);
334   int FindBreakLabelDepth(AsmJsScanner::token_t label);
335 
336   // Use to set up actual wasm blocks/loops.
337   void Begin(AsmJsScanner::token_t label = 0);
338   void Loop(AsmJsScanner::token_t label = 0);
339   void End();
340 
341   void InitializeStdlibTypes();
342 
343   FunctionSig* ConvertSignature(AsmType* return_type,
344                                 const ZoneVector<AsmType*>& params);
345 
346   void ValidateModule();            // 6.1 ValidateModule
347   void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
348   void ValidateModuleVars();        // 6.1 ValidateModule - variables
349   void ValidateModuleVar(bool mutable_variable);
350   void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
351   void ValidateModuleVarStdlib(VarInfo* info);
352   void ValidateModuleVarNewStdlib(VarInfo* info);
353   void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
354 
355   void ValidateExport();         // 6.2 ValidateExport
356   void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
357   void ValidateFunction();       // 6.4 ValidateFunction
358   void ValidateFunctionParams(ZoneVector<AsmType*>* params);
359   void ValidateFunctionLocals(size_t param_count,
360                               ZoneVector<ValueType>* locals);
361   void ValidateStatement();              // 6.5 ValidateStatement
362   void Block();                          // 6.5.1 Block
363   void ExpressionStatement();            // 6.5.2 ExpressionStatement
364   void EmptyStatement();                 // 6.5.3 EmptyStatement
365   void IfStatement();                    // 6.5.4 IfStatement
366   void ReturnStatement();                // 6.5.5 ReturnStatement
367   bool IterationStatement();             // 6.5.6 IterationStatement
368   void WhileStatement();                 // 6.5.6 IterationStatement - while
369   void DoStatement();                    // 6.5.6 IterationStatement - do
370   void ForStatement();                   // 6.5.6 IterationStatement - for
371   void BreakStatement();                 // 6.5.7 BreakStatement
372   void ContinueStatement();              // 6.5.8 ContinueStatement
373   void LabelledStatement();              // 6.5.9 LabelledStatement
374   void SwitchStatement();                // 6.5.10 SwitchStatement
375   void ValidateCase();                   // 6.6. ValidateCase
376   void ValidateDefault();                // 6.7 ValidateDefault
377   AsmType* ValidateExpression();         // 6.8 ValidateExpression
378   AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
379   AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
380   AsmType* Identifier();                 // 6.8.3 Identifier
381   AsmType* CallExpression();             // 6.8.4 CallExpression
382   AsmType* MemberExpression();           // 6.8.5 MemberExpression
383   AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
384   AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
385   AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
386   AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
387   AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
388   AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
389   AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
390   AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
391   AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
392   AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
393   AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
394   AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
395   AsmType* ValidateCall();               // 6.9 ValidateCall
396   bool PeekCall();                       // 6.9 ValidateCall - helper
397   void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
398   void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion
399 
400   // Used as part of {ForStatement}. Scans forward to the next `)` in order to
401   // skip over the third expression in a for-statement. This is one piece that
402   // makes this parser not be a pure single-pass.
403   void ScanToClosingParenthesis();
404 
405   // Used as part of {SwitchStatement}. Collects all case labels in the current
406   // switch-statement, then resets the scanner position. This is one piece that
407   // makes this parser not be a pure single-pass.
408   void GatherCases(ZoneVector<int32_t>* cases);
409 };
410 
411 }  // namespace wasm
412 }  // namespace internal
413 }  // namespace v8
414 
415 #endif  // V8_ASMJS_ASM_PARSER_H_
416