• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_PARSER_H_
6 #define V8_ASMJS_ASM_PARSER_H_
7 
8 #include <memory>
9 
10 #include "src/asmjs/asm-scanner.h"
11 #include "src/asmjs/asm-types.h"
12 #include "src/base/enum-set.h"
13 #include "src/base/vector.h"
14 #include "src/wasm/wasm-module-builder.h"
15 #include "src/zone/zone-containers.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 class Utf16CharacterStream;
21 
22 namespace wasm {
23 
24 // A custom parser + validator + wasm converter for asm.js:
25 // http://asmjs.org/spec/latest/
26 // This parser intentionally avoids the portion of JavaScript parsing
27 // that are not required to determine if code is valid asm.js code.
28 // * It is mostly one pass.
29 // * It bails out on unexpected input.
30 // * It assumes strict ordering insofar as permitted by asm.js validation rules.
31 // * It relies on a custom scanner that provides de-duped identifiers in two
32 //   scopes (local + module wide).
33 class AsmJsParser {
34  public:
35   // clang-format off
36   enum StandardMember {
37     kInfinity,
38     kNaN,
39 #define V(_unused1, name, _unused2, _unused3) kMath##name,
40     STDLIB_MATH_FUNCTION_LIST(V)
41 #undef V
42 #define V(name, _unused1) kMath##name,
43     STDLIB_MATH_VALUE_LIST(V)
44 #undef V
45 #define V(name, _unused1, _unused2, _unused3) k##name,
46     STDLIB_ARRAY_TYPE_LIST(V)
47 #undef V
48   };
49   // clang-format on
50 
51   using StdlibSet = base::EnumSet<StandardMember, uint64_t>;
52 
53   explicit AsmJsParser(Zone* zone, uintptr_t stack_limit,
54                        Utf16CharacterStream* stream);
55   bool Run();
failure_message()56   const char* failure_message() const { return failure_message_; }
failure_location()57   int failure_location() const { return failure_location_; }
module_builder()58   WasmModuleBuilder* module_builder() { return module_builder_; }
stdlib_uses()59   const StdlibSet* stdlib_uses() const { return &stdlib_uses_; }
60 
61  private:
62   // clang-format off
63   enum class VarKind {
64     kUnused,
65     kLocal,
66     kGlobal,
67     kSpecial,
68     kFunction,
69     kTable,
70     kImportedFunction,
71 #define V(_unused0, Name, _unused1, _unused2) kMath##Name,
72     STDLIB_MATH_FUNCTION_LIST(V)
73 #undef V
74 #define V(Name, _unused1) kMath##Name,
75     STDLIB_MATH_VALUE_LIST(V)
76 #undef V
77   };
78   // clang-format on
79 
80   // A single import in asm.js can require multiple imports in wasm, if the
81   // function is used with different signatures. {cache} keeps the wasm
82   // imports for the single asm.js import of name {function_name}.
83   struct FunctionImportInfo {
84     base::Vector<const char> function_name;
85     ZoneUnorderedMap<FunctionSig, uint32_t> cache;
86 
87     // Constructor.
FunctionImportInfoFunctionImportInfo88     FunctionImportInfo(base::Vector<const char> name, Zone* zone)
89         : function_name(name), cache(zone) {}
90   };
91 
92   struct VarInfo {
93     AsmType* type = AsmType::None();
94     WasmFunctionBuilder* function_builder = nullptr;
95     FunctionImportInfo* import = nullptr;
96     uint32_t mask = 0;
97     uint32_t index = 0;
98     VarKind kind = VarKind::kUnused;
99     bool mutable_variable = true;
100     bool function_defined = false;
101   };
102 
103   struct GlobalImport {
104     base::Vector<const char> import_name;
105     ValueType value_type;
106     VarInfo* var_info;
107   };
108 
109   // Distinguish different kinds of blocks participating in {block_stack}. Each
110   // entry on that stack represents one block in the wasm code, and determines
111   // which block 'break' and 'continue' target in the current context:
112   //  - kRegular: The target of a 'break' (with & without identifier).
113   //              Pushed by an IterationStatement and a SwitchStatement.
114   //  - kLoop   : The target of a 'continue' (with & without identifier).
115   //              Pushed by an IterationStatement.
116   //  - kNamed  : The target of a 'break' with a specific identifier.
117   //              Pushed by a BlockStatement.
118   //  - kOther  : Only used for internal blocks, can never be targeted.
119   enum class BlockKind { kRegular, kLoop, kNamed, kOther };
120 
121   // One entry in the {block_stack}, see {BlockKind} above for details. Blocks
122   // without a label have {kTokenNone} set as their label.
123   struct BlockInfo {
124     BlockKind kind;
125     AsmJsScanner::token_t label;
126   };
127 
128   // Helper class to make {TempVariable} safe for nesting.
129   class TemporaryVariableScope;
130 
131   template <typename T>
132   class CachedVectors {
133    public:
CachedVectors(Zone * zone)134     explicit CachedVectors(Zone* zone) : reusable_vectors_(zone) {}
135 
zone()136     Zone* zone() const { return reusable_vectors_.get_allocator().zone(); }
137 
fill(ZoneVector<T> * vec)138     inline void fill(ZoneVector<T>* vec) {
139       if (reusable_vectors_.empty()) return;
140       reusable_vectors_.back().swap(*vec);
141       reusable_vectors_.pop_back();
142       vec->clear();
143     }
144 
reuse(ZoneVector<T> * vec)145     inline void reuse(ZoneVector<T>* vec) {
146       reusable_vectors_.emplace_back(std::move(*vec));
147     }
148 
149    private:
150     ZoneVector<ZoneVector<T>> reusable_vectors_;
151   };
152 
153   template <typename T>
154   class CachedVector final : public ZoneVector<T> {
155    public:
CachedVector(CachedVectors<T> * cache)156     explicit CachedVector(CachedVectors<T>* cache)
157         : ZoneVector<T>(cache->zone()), cache_(cache) {
158       cache->fill(this);
159     }
~CachedVector()160     ~CachedVector() { cache_->reuse(this); }
161 
162    private:
163     CachedVectors<T>* cache_;
164   };
165 
166   Zone* zone_;
167   AsmJsScanner scanner_;
168   WasmModuleBuilder* module_builder_;
169   WasmFunctionBuilder* current_function_builder_;
170   AsmType* return_type_ = nullptr;
171   uintptr_t stack_limit_;
172   StdlibSet stdlib_uses_;
173   base::Vector<VarInfo> global_var_info_;
174   base::Vector<VarInfo> local_var_info_;
175   size_t num_globals_ = 0;
176 
177   CachedVectors<ValueType> cached_valuetype_vectors_{zone_};
178   CachedVectors<AsmType*> cached_asm_type_p_vectors_{zone_};
179   CachedVectors<AsmJsScanner::token_t> cached_token_t_vectors_{zone_};
180   CachedVectors<int32_t> cached_int_vectors_{zone_};
181 
182   int function_temp_locals_offset_;
183   int function_temp_locals_used_;
184   int function_temp_locals_depth_;
185 
186   // Error Handling related
187   bool failed_ = false;
188   const char* failure_message_;
189   int failure_location_ = kNoSourcePosition;
190 
191   // Module Related.
192   AsmJsScanner::token_t stdlib_name_ = kTokenNone;
193   AsmJsScanner::token_t foreign_name_ = kTokenNone;
194   AsmJsScanner::token_t heap_name_ = kTokenNone;
195 
196   static const AsmJsScanner::token_t kTokenNone = 0;
197 
198   // Track if parsing a heap assignment.
199   bool inside_heap_assignment_ = false;
200   AsmType* heap_access_type_ = nullptr;
201 
202   ZoneVector<BlockInfo> block_stack_;
203 
204   // Types used for stdlib function and their set up.
205   AsmType* stdlib_dq2d_;
206   AsmType* stdlib_dqdq2d_;
207   AsmType* stdlib_i2s_;
208   AsmType* stdlib_ii2s_;
209   AsmType* stdlib_minmax_;
210   AsmType* stdlib_abs_;
211   AsmType* stdlib_ceil_like_;
212   AsmType* stdlib_fround_;
213 
214   // When making calls, the return type is needed to lookup signatures.
215   // For `+callsite(..)` or `fround(callsite(..))` use this value to pass
216   // along the coercion.
217   AsmType* call_coercion_ = nullptr;
218 
219   // The source position associated with the above {call_coercion}.
220   size_t call_coercion_position_;
221 
222   // When making calls, the coercion can also appear in the source stream
223   // syntactically "behind" the call site. For `callsite(..)|0` use this
224   // value to flag that such a coercion must happen.
225   AsmType* call_coercion_deferred_ = nullptr;
226 
227   // The source position at which requesting a deferred coercion via the
228   // aforementioned {call_coercion_deferred} is allowed.
229   size_t call_coercion_deferred_position_;
230 
231   // The code position of the last heap access shift by an immediate value.
232   // For `heap[expr >> value:NumericLiteral]` this indicates from where to
233   // delete code when the expression is used as part of a valid heap access.
234   // Will be set to {kNoHeapAccessShift} if heap access shift wasn't matched.
235   size_t heap_access_shift_position_;
236   uint32_t heap_access_shift_value_;
237   static const size_t kNoHeapAccessShift = -1;
238 
239   // Used to track the last label we've seen so it can be matched to later
240   // statements it's attached to.
241   AsmJsScanner::token_t pending_label_ = kTokenNone;
242 
243   // Global imports. The list of imported variables that are copied during
244   // module instantiation into a corresponding global variable.
245   ZoneLinkedList<GlobalImport> global_imports_;
246 
zone()247   Zone* zone() { return zone_; }
248 
Peek(AsmJsScanner::token_t token)249   inline bool Peek(AsmJsScanner::token_t token) {
250     return scanner_.Token() == token;
251   }
252 
PeekForZero()253   inline bool PeekForZero() {
254     return (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0);
255   }
256 
Check(AsmJsScanner::token_t token)257   inline bool Check(AsmJsScanner::token_t token) {
258     if (scanner_.Token() == token) {
259       scanner_.Next();
260       return true;
261     } else {
262       return false;
263     }
264   }
265 
CheckForZero()266   inline bool CheckForZero() {
267     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() == 0) {
268       scanner_.Next();
269       return true;
270     } else {
271       return false;
272     }
273   }
274 
CheckForDouble(double * value)275   inline bool CheckForDouble(double* value) {
276     if (scanner_.IsDouble()) {
277       *value = scanner_.AsDouble();
278       scanner_.Next();
279       return true;
280     } else {
281       return false;
282     }
283   }
284 
CheckForUnsigned(uint32_t * value)285   inline bool CheckForUnsigned(uint32_t* value) {
286     if (scanner_.IsUnsigned()) {
287       *value = scanner_.AsUnsigned();
288       scanner_.Next();
289       return true;
290     } else {
291       return false;
292     }
293   }
294 
CheckForUnsignedBelow(uint32_t limit,uint32_t * value)295   inline bool CheckForUnsignedBelow(uint32_t limit, uint32_t* value) {
296     if (scanner_.IsUnsigned() && scanner_.AsUnsigned() < limit) {
297       *value = scanner_.AsUnsigned();
298       scanner_.Next();
299       return true;
300     } else {
301       return false;
302     }
303   }
304 
Consume()305   inline AsmJsScanner::token_t Consume() {
306     AsmJsScanner::token_t ret = scanner_.Token();
307     scanner_.Next();
308     return ret;
309   }
310 
311   void SkipSemicolon();
312 
313   VarInfo* GetVarInfo(AsmJsScanner::token_t token);
314   uint32_t VarIndex(VarInfo* info);
315   void DeclareGlobal(VarInfo* info, bool mutable_variable, AsmType* type,
316                      ValueType vtype, WasmInitExpr init = WasmInitExpr());
317   void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
318   void AddGlobalImport(base::Vector<const char> name, AsmType* type,
319                        ValueType vtype, bool mutable_variable, VarInfo* info);
320 
321   // Allocates a temporary local variable. The given {index} is absolute within
322   // the function body, consider using {TemporaryVariableScope} when nesting.
323   uint32_t TempVariable(int index);
324 
325   // Preserves a copy of the scanner's current identifier string in the zone.
326   base::Vector<const char> CopyCurrentIdentifierString();
327 
328   // Use to set up block stack layers (including synthetic ones for if-else).
329   // Begin/Loop/End below are implemented with these plus code generation.
330   void BareBegin(BlockKind kind, AsmJsScanner::token_t label = 0);
331   void BareEnd();
332   int FindContinueLabelDepth(AsmJsScanner::token_t label);
333   int FindBreakLabelDepth(AsmJsScanner::token_t label);
334 
335   // Use to set up actual wasm blocks/loops.
336   void Begin(AsmJsScanner::token_t label = 0);
337   void Loop(AsmJsScanner::token_t label = 0);
338   void End();
339 
340   void InitializeStdlibTypes();
341 
342   FunctionSig* ConvertSignature(AsmType* return_type,
343                                 const ZoneVector<AsmType*>& params);
344 
345   void ValidateModule();            // 6.1 ValidateModule
346   void ValidateModuleParameters();  // 6.1 ValidateModule - parameters
347   void ValidateModuleVars();        // 6.1 ValidateModule - variables
348   void ValidateModuleVar(bool mutable_variable);
349   void ValidateModuleVarImport(VarInfo* info, bool mutable_variable);
350   void ValidateModuleVarStdlib(VarInfo* info);
351   void ValidateModuleVarNewStdlib(VarInfo* info);
352   void ValidateModuleVarFromGlobal(VarInfo* info, bool mutable_variable);
353 
354   void ValidateExport();         // 6.2 ValidateExport
355   void ValidateFunctionTable();  // 6.3 ValidateFunctionTable
356   void ValidateFunction();       // 6.4 ValidateFunction
357   void ValidateFunctionParams(ZoneVector<AsmType*>* params);
358   void ValidateFunctionLocals(size_t param_count,
359                               ZoneVector<ValueType>* locals);
360   void ValidateStatement();              // 6.5 ValidateStatement
361   void Block();                          // 6.5.1 Block
362   void ExpressionStatement();            // 6.5.2 ExpressionStatement
363   void EmptyStatement();                 // 6.5.3 EmptyStatement
364   void IfStatement();                    // 6.5.4 IfStatement
365   void ReturnStatement();                // 6.5.5 ReturnStatement
366   bool IterationStatement();             // 6.5.6 IterationStatement
367   void WhileStatement();                 // 6.5.6 IterationStatement - while
368   void DoStatement();                    // 6.5.6 IterationStatement - do
369   void ForStatement();                   // 6.5.6 IterationStatement - for
370   void BreakStatement();                 // 6.5.7 BreakStatement
371   void ContinueStatement();              // 6.5.8 ContinueStatement
372   void LabelledStatement();              // 6.5.9 LabelledStatement
373   void SwitchStatement();                // 6.5.10 SwitchStatement
374   void ValidateCase();                   // 6.6. ValidateCase
375   void ValidateDefault();                // 6.7 ValidateDefault
376   AsmType* ValidateExpression();         // 6.8 ValidateExpression
377   AsmType* Expression(AsmType* expect);  // 6.8.1 Expression
378   AsmType* NumericLiteral();             // 6.8.2 NumericLiteral
379   AsmType* Identifier();                 // 6.8.3 Identifier
380   AsmType* CallExpression();             // 6.8.4 CallExpression
381   AsmType* MemberExpression();           // 6.8.5 MemberExpression
382   AsmType* AssignmentExpression();       // 6.8.6 AssignmentExpression
383   AsmType* UnaryExpression();            // 6.8.7 UnaryExpression
384   AsmType* MultiplicativeExpression();   // 6.8.8 MultiplicativeExpression
385   AsmType* AdditiveExpression();         // 6.8.9 AdditiveExpression
386   AsmType* ShiftExpression();            // 6.8.10 ShiftExpression
387   AsmType* RelationalExpression();       // 6.8.11 RelationalExpression
388   AsmType* EqualityExpression();         // 6.8.12 EqualityExpression
389   AsmType* BitwiseANDExpression();       // 6.8.13 BitwiseANDExpression
390   AsmType* BitwiseXORExpression();       // 6.8.14 BitwiseXORExpression
391   AsmType* BitwiseORExpression();        // 6.8.15 BitwiseORExpression
392   AsmType* ConditionalExpression();      // 6.8.16 ConditionalExpression
393   AsmType* ParenthesizedExpression();    // 6.8.17 ParenthesiedExpression
394   AsmType* ValidateCall();               // 6.9 ValidateCall
395   bool PeekCall();                       // 6.9 ValidateCall - helper
396   void ValidateHeapAccess();             // 6.10 ValidateHeapAccess
397   void ValidateFloatCoercion();          // 6.11 ValidateFloatCoercion
398 
399   // Used as part of {ForStatement}. Scans forward to the next `)` in order to
400   // skip over the third expression in a for-statement. This is one piece that
401   // makes this parser not be a pure single-pass.
402   void ScanToClosingParenthesis();
403 
404   // Used as part of {SwitchStatement}. Collects all case labels in the current
405   // switch-statement, then resets the scanner position. This is one piece that
406   // makes this parser not be a pure single-pass.
407   void GatherCases(ZoneVector<int32_t>* cases);
408 };
409 
410 }  // namespace wasm
411 }  // namespace internal
412 }  // namespace v8
413 
414 #endif  // V8_ASMJS_ASM_PARSER_H_
415