1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/macro-assembler.h"
6 #include "src/objects.h"
7 #include "src/v8.h"
8
9 #include "src/wasm/decoder.h"
10 #include "src/wasm/module-decoder.h"
11
12 namespace v8 {
13 namespace internal {
14 namespace wasm {
15
16 #if DEBUG
17 #define TRACE(...) \
18 do { \
19 if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
20 } while (false)
21 #else
22 #define TRACE(...)
23 #endif
24
25
26 // The main logic for decoding the bytes of a module.
27 class ModuleDecoder : public Decoder {
28 public:
ModuleDecoder(Zone * zone,const byte * module_start,const byte * module_end,bool asm_js)29 ModuleDecoder(Zone* zone, const byte* module_start, const byte* module_end,
30 bool asm_js)
31 : Decoder(module_start, module_end), module_zone(zone), asm_js_(asm_js) {
32 result_.start = start_;
33 if (limit_ < start_) {
34 error(start_, "end is less than start");
35 limit_ = start_;
36 }
37 }
38
onFirstError()39 virtual void onFirstError() {
40 pc_ = limit_; // On error, terminate section decoding loop.
41 }
42
43 // Decodes an entire module.
DecodeModule(WasmModule * module,bool verify_functions=true)44 ModuleResult DecodeModule(WasmModule* module, bool verify_functions = true) {
45 pc_ = start_;
46 module->module_start = start_;
47 module->module_end = limit_;
48 module->min_mem_size_log2 = 0;
49 module->max_mem_size_log2 = 0;
50 module->mem_export = false;
51 module->mem_external = false;
52 module->globals = new std::vector<WasmGlobal>();
53 module->signatures = new std::vector<FunctionSig*>();
54 module->functions = new std::vector<WasmFunction>();
55 module->data_segments = new std::vector<WasmDataSegment>();
56 module->function_table = new std::vector<uint16_t>();
57
58 bool sections[kMaxModuleSectionCode];
59 memset(sections, 0, sizeof(sections));
60
61 // Decode the module sections.
62 while (pc_ < limit_) {
63 TRACE("DecodeSection\n");
64 WasmSectionDeclCode section =
65 static_cast<WasmSectionDeclCode>(u8("section"));
66 // Each section should appear at most once.
67 if (section < kMaxModuleSectionCode) {
68 CheckForPreviousSection(sections, section, false);
69 sections[section] = true;
70 }
71
72 switch (section) {
73 case kDeclEnd:
74 // Terminate section decoding.
75 limit_ = pc_;
76 break;
77 case kDeclMemory:
78 module->min_mem_size_log2 = u8("min memory");
79 module->max_mem_size_log2 = u8("max memory");
80 module->mem_export = u8("export memory") != 0;
81 break;
82 case kDeclSignatures: {
83 int length;
84 uint32_t signatures_count = u32v(&length, "signatures count");
85 module->signatures->reserve(SafeReserve(signatures_count));
86 // Decode signatures.
87 for (uint32_t i = 0; i < signatures_count; i++) {
88 if (failed()) break;
89 TRACE("DecodeSignature[%d] module+%d\n", i,
90 static_cast<int>(pc_ - start_));
91 FunctionSig* s = sig(); // read function sig.
92 module->signatures->push_back(s);
93 }
94 break;
95 }
96 case kDeclFunctions: {
97 // Functions require a signature table first.
98 CheckForPreviousSection(sections, kDeclSignatures, true);
99 int length;
100 uint32_t functions_count = u32v(&length, "functions count");
101 module->functions->reserve(SafeReserve(functions_count));
102 // Set up module environment for verification.
103 ModuleEnv menv;
104 menv.module = module;
105 menv.globals_area = 0;
106 menv.mem_start = 0;
107 menv.mem_end = 0;
108 menv.function_code = nullptr;
109 menv.asm_js = asm_js_;
110 // Decode functions.
111 for (uint32_t i = 0; i < functions_count; i++) {
112 if (failed()) break;
113 TRACE("DecodeFunction[%d] module+%d\n", i,
114 static_cast<int>(pc_ - start_));
115
116 module->functions->push_back(
117 {nullptr, 0, 0, 0, 0, 0, 0, false, false});
118 WasmFunction* function = &module->functions->back();
119 DecodeFunctionInModule(module, function, false);
120 }
121 if (ok() && verify_functions) {
122 for (uint32_t i = 0; i < functions_count; i++) {
123 if (failed()) break;
124 WasmFunction* function = &module->functions->at(i);
125 if (!function->external) {
126 VerifyFunctionBody(i, &menv, function);
127 if (result_.failed())
128 error(result_.error_pc, result_.error_msg.get());
129 }
130 }
131 }
132 break;
133 }
134 case kDeclGlobals: {
135 int length;
136 uint32_t globals_count = u32v(&length, "globals count");
137 module->globals->reserve(SafeReserve(globals_count));
138 // Decode globals.
139 for (uint32_t i = 0; i < globals_count; i++) {
140 if (failed()) break;
141 TRACE("DecodeGlobal[%d] module+%d\n", i,
142 static_cast<int>(pc_ - start_));
143 module->globals->push_back({0, MachineType::Int32(), 0, false});
144 WasmGlobal* global = &module->globals->back();
145 DecodeGlobalInModule(global);
146 }
147 break;
148 }
149 case kDeclDataSegments: {
150 int length;
151 uint32_t data_segments_count = u32v(&length, "data segments count");
152 module->data_segments->reserve(SafeReserve(data_segments_count));
153 // Decode data segments.
154 for (uint32_t i = 0; i < data_segments_count; i++) {
155 if (failed()) break;
156 TRACE("DecodeDataSegment[%d] module+%d\n", i,
157 static_cast<int>(pc_ - start_));
158 module->data_segments->push_back({0, 0, 0});
159 WasmDataSegment* segment = &module->data_segments->back();
160 DecodeDataSegmentInModule(segment);
161 }
162 break;
163 }
164 case kDeclFunctionTable: {
165 // An indirect function table requires functions first.
166 CheckForPreviousSection(sections, kDeclFunctions, true);
167 int length;
168 uint32_t function_table_count = u32v(&length, "function table count");
169 module->function_table->reserve(SafeReserve(function_table_count));
170 // Decode function table.
171 for (uint32_t i = 0; i < function_table_count; i++) {
172 if (failed()) break;
173 TRACE("DecodeFunctionTable[%d] module+%d\n", i,
174 static_cast<int>(pc_ - start_));
175 uint16_t index = u16();
176 if (index >= module->functions->size()) {
177 error(pc_ - 2, "invalid function index");
178 break;
179 }
180 module->function_table->push_back(index);
181 }
182 break;
183 }
184 case kDeclWLL: {
185 // Reserved for experimentation by the Web Low-level Language project
186 // which is augmenting the binary encoding with source code meta
187 // information. This section does not affect the semantics of the code
188 // and can be ignored by the runtime. https://github.com/JSStats/wll
189 int length = 0;
190 uint32_t section_size = u32v(&length, "section size");
191 if (pc_ + section_size > limit_ || pc_ + section_size < pc_) {
192 error(pc_ - length, "invalid section size");
193 break;
194 }
195 pc_ += section_size;
196 break;
197 }
198 default:
199 error(pc_ - 1, nullptr, "unrecognized section 0x%02x", section);
200 break;
201 }
202 }
203
204 return toResult(module);
205 }
206
SafeReserve(uint32_t count)207 uint32_t SafeReserve(uint32_t count) {
208 // Avoid OOM by only reserving up to a certain size.
209 const uint32_t kMaxReserve = 20000;
210 return count < kMaxReserve ? count : kMaxReserve;
211 }
212
CheckForPreviousSection(bool * sections,WasmSectionDeclCode section,bool present)213 void CheckForPreviousSection(bool* sections, WasmSectionDeclCode section,
214 bool present) {
215 if (section >= kMaxModuleSectionCode) return;
216 if (sections[section] == present) return;
217 const char* name = "";
218 switch (section) {
219 case kDeclMemory:
220 name = "memory";
221 break;
222 case kDeclSignatures:
223 name = "signatures";
224 break;
225 case kDeclFunctions:
226 name = "function declaration";
227 break;
228 case kDeclGlobals:
229 name = "global variable";
230 break;
231 case kDeclDataSegments:
232 name = "data segment";
233 break;
234 case kDeclFunctionTable:
235 name = "function table";
236 break;
237 default:
238 name = "";
239 break;
240 }
241 if (present) {
242 error(pc_ - 1, nullptr, "required %s section missing", name);
243 } else {
244 error(pc_ - 1, nullptr, "%s section already present", name);
245 }
246 }
247
248 // Decodes a single anonymous function starting at {start_}.
DecodeSingleFunction(ModuleEnv * module_env,WasmFunction * function)249 FunctionResult DecodeSingleFunction(ModuleEnv* module_env,
250 WasmFunction* function) {
251 pc_ = start_;
252 function->sig = sig(); // read signature
253 function->name_offset = 0; // ---- name
254 function->code_start_offset = off(pc_ + 8); // ---- code start
255 function->code_end_offset = off(limit_); // ---- code end
256 function->local_int32_count = u16(); // read u16
257 function->local_int64_count = u16(); // read u16
258 function->local_float32_count = u16(); // read u16
259 function->local_float64_count = u16(); // read u16
260 function->exported = false; // ---- exported
261 function->external = false; // ---- external
262
263 if (ok()) VerifyFunctionBody(0, module_env, function);
264
265 FunctionResult result;
266 result.CopyFrom(result_); // Copy error code and location.
267 result.val = function;
268 return result;
269 }
270
271 // Decodes a single function signature at {start}.
DecodeFunctionSignature(const byte * start)272 FunctionSig* DecodeFunctionSignature(const byte* start) {
273 pc_ = start;
274 FunctionSig* result = sig();
275 return ok() ? result : nullptr;
276 }
277
278 private:
279 Zone* module_zone;
280 ModuleResult result_;
281 bool asm_js_;
282
off(const byte * ptr)283 uint32_t off(const byte* ptr) { return static_cast<uint32_t>(ptr - start_); }
284
285 // Decodes a single global entry inside a module starting at {pc_}.
DecodeGlobalInModule(WasmGlobal * global)286 void DecodeGlobalInModule(WasmGlobal* global) {
287 global->name_offset = string("global name");
288 global->type = mem_type();
289 global->offset = 0;
290 global->exported = u8("exported") != 0;
291 }
292
293 // Decodes a single function entry inside a module starting at {pc_}.
DecodeFunctionInModule(WasmModule * module,WasmFunction * function,bool verify_body=true)294 void DecodeFunctionInModule(WasmModule* module, WasmFunction* function,
295 bool verify_body = true) {
296 byte decl_bits = u8("function decl");
297
298 const byte* sigpos = pc_;
299 function->sig_index = u16("signature index");
300
301 if (function->sig_index >= module->signatures->size()) {
302 return error(sigpos, "invalid signature index");
303 } else {
304 function->sig = module->signatures->at(function->sig_index);
305 }
306
307 TRACE(" +%d <function attributes:%s%s%s%s%s>\n",
308 static_cast<int>(pc_ - start_),
309 decl_bits & kDeclFunctionName ? " name" : "",
310 decl_bits & kDeclFunctionImport ? " imported" : "",
311 decl_bits & kDeclFunctionLocals ? " locals" : "",
312 decl_bits & kDeclFunctionExport ? " exported" : "",
313 (decl_bits & kDeclFunctionImport) == 0 ? " body" : "");
314
315 if (decl_bits & kDeclFunctionName) {
316 function->name_offset = string("function name");
317 }
318
319 function->exported = decl_bits & kDeclFunctionExport;
320
321 // Imported functions have no locals or body.
322 if (decl_bits & kDeclFunctionImport) {
323 function->external = true;
324 return;
325 }
326
327 if (decl_bits & kDeclFunctionLocals) {
328 function->local_int32_count = u16("int32 count");
329 function->local_int64_count = u16("int64 count");
330 function->local_float32_count = u16("float32 count");
331 function->local_float64_count = u16("float64 count");
332 }
333
334 uint16_t size = u16("body size");
335 if (ok()) {
336 if ((pc_ + size) > limit_) {
337 return error(pc_, limit_,
338 "expected %d bytes for function body, fell off end", size);
339 }
340 function->code_start_offset = static_cast<uint32_t>(pc_ - start_);
341 function->code_end_offset = function->code_start_offset + size;
342 TRACE(" +%d %-20s: (%d bytes)\n", static_cast<int>(pc_ - start_),
343 "function body", size);
344 pc_ += size;
345 }
346 }
347
348 // Decodes a single data segment entry inside a module starting at {pc_}.
DecodeDataSegmentInModule(WasmDataSegment * segment)349 void DecodeDataSegmentInModule(WasmDataSegment* segment) {
350 segment->dest_addr =
351 u32("destination"); // TODO(titzer): check it's within the memory size.
352 segment->source_offset = offset("source offset");
353 segment->source_size =
354 u32("source size"); // TODO(titzer): check the size is reasonable.
355 segment->init = u8("init");
356 }
357
358 // Verifies the body (code) of a given function.
VerifyFunctionBody(uint32_t func_num,ModuleEnv * menv,WasmFunction * function)359 void VerifyFunctionBody(uint32_t func_num, ModuleEnv* menv,
360 WasmFunction* function) {
361 if (FLAG_trace_wasm_decode_time) {
362 // TODO(titzer): clean me up a bit.
363 OFStream os(stdout);
364 os << "Verifying WASM function:";
365 if (function->name_offset > 0) {
366 os << menv->module->GetName(function->name_offset);
367 }
368 os << std::endl;
369 }
370 FunctionEnv fenv;
371 fenv.module = menv;
372 fenv.sig = function->sig;
373 fenv.local_int32_count = function->local_int32_count;
374 fenv.local_int64_count = function->local_int64_count;
375 fenv.local_float32_count = function->local_float32_count;
376 fenv.local_float64_count = function->local_float64_count;
377 fenv.SumLocals();
378
379 TreeResult result =
380 VerifyWasmCode(&fenv, start_, start_ + function->code_start_offset,
381 start_ + function->code_end_offset);
382 if (result.failed()) {
383 // Wrap the error message from the function decoder.
384 std::ostringstream str;
385 str << "in function #" << func_num << ": ";
386 // TODO(titzer): add function name for the user?
387 str << result;
388 std::string strval = str.str();
389 const char* raw = strval.c_str();
390 size_t len = strlen(raw);
391 char* buffer = new char[len];
392 strncpy(buffer, raw, len);
393 buffer[len - 1] = 0;
394
395 // Copy error code and location.
396 result_.CopyFrom(result);
397 result_.error_msg.Reset(buffer);
398 }
399 }
400
401 // Reads a single 32-bit unsigned integer interpreted as an offset, checking
402 // the offset is within bounds and advances.
offset(const char * name=nullptr)403 uint32_t offset(const char* name = nullptr) {
404 uint32_t offset = u32(name ? name : "offset");
405 if (offset > static_cast<uint32_t>(limit_ - start_)) {
406 error(pc_ - sizeof(uint32_t), "offset out of bounds of module");
407 }
408 return offset;
409 }
410
411 // Reads a single 32-bit unsigned integer interpreted as an offset into the
412 // data and validating the string there and advances.
string(const char * name=nullptr)413 uint32_t string(const char* name = nullptr) {
414 return offset(name ? name : "string"); // TODO(titzer): validate string
415 }
416
417 // Reads a single 8-bit integer, interpreting it as a local type.
local_type()418 LocalType local_type() {
419 byte val = u8("local type");
420 LocalTypeCode t = static_cast<LocalTypeCode>(val);
421 switch (t) {
422 case kLocalVoid:
423 return kAstStmt;
424 case kLocalI32:
425 return kAstI32;
426 case kLocalI64:
427 return kAstI64;
428 case kLocalF32:
429 return kAstF32;
430 case kLocalF64:
431 return kAstF64;
432 default:
433 error(pc_ - 1, "invalid local type");
434 return kAstStmt;
435 }
436 }
437
438 // Reads a single 8-bit integer, interpreting it as a memory type.
mem_type()439 MachineType mem_type() {
440 byte val = u8("memory type");
441 MemTypeCode t = static_cast<MemTypeCode>(val);
442 switch (t) {
443 case kMemI8:
444 return MachineType::Int8();
445 case kMemU8:
446 return MachineType::Uint8();
447 case kMemI16:
448 return MachineType::Int16();
449 case kMemU16:
450 return MachineType::Uint16();
451 case kMemI32:
452 return MachineType::Int32();
453 case kMemU32:
454 return MachineType::Uint32();
455 case kMemI64:
456 return MachineType::Int64();
457 case kMemU64:
458 return MachineType::Uint64();
459 case kMemF32:
460 return MachineType::Float32();
461 case kMemF64:
462 return MachineType::Float64();
463 default:
464 error(pc_ - 1, "invalid memory type");
465 return MachineType::None();
466 }
467 }
468
469 // Parses an inline function signature.
sig()470 FunctionSig* sig() {
471 byte count = u8("param count");
472 LocalType ret = local_type();
473 FunctionSig::Builder builder(module_zone, ret == kAstStmt ? 0 : 1, count);
474 if (ret != kAstStmt) builder.AddReturn(ret);
475
476 for (int i = 0; i < count; i++) {
477 LocalType param = local_type();
478 if (param == kAstStmt) error(pc_ - 1, "invalid void parameter type");
479 builder.AddParam(param);
480 }
481 return builder.Build();
482 }
483 };
484
485
486 // Helpers for nice error messages.
487 class ModuleError : public ModuleResult {
488 public:
ModuleError(const char * msg)489 explicit ModuleError(const char* msg) {
490 error_code = kError;
491 size_t len = strlen(msg) + 1;
492 char* result = new char[len];
493 strncpy(result, msg, len);
494 result[len - 1] = 0;
495 error_msg.Reset(result);
496 }
497 };
498
499
500 // Helpers for nice error messages.
501 class FunctionError : public FunctionResult {
502 public:
FunctionError(const char * msg)503 explicit FunctionError(const char* msg) {
504 error_code = kError;
505 size_t len = strlen(msg) + 1;
506 char* result = new char[len];
507 strncpy(result, msg, len);
508 result[len - 1] = 0;
509 error_msg.Reset(result);
510 }
511 };
512
513
DecodeWasmModule(Isolate * isolate,Zone * zone,const byte * module_start,const byte * module_end,bool verify_functions,bool asm_js)514 ModuleResult DecodeWasmModule(Isolate* isolate, Zone* zone,
515 const byte* module_start, const byte* module_end,
516 bool verify_functions, bool asm_js) {
517 size_t size = module_end - module_start;
518 if (module_start > module_end) return ModuleError("start > end");
519 if (size >= kMaxModuleSize) return ModuleError("size > maximum module size");
520 WasmModule* module = new WasmModule();
521 ModuleDecoder decoder(zone, module_start, module_end, asm_js);
522 return decoder.DecodeModule(module, verify_functions);
523 }
524
525
DecodeWasmSignatureForTesting(Zone * zone,const byte * start,const byte * end)526 FunctionSig* DecodeWasmSignatureForTesting(Zone* zone, const byte* start,
527 const byte* end) {
528 ModuleDecoder decoder(zone, start, end, false);
529 return decoder.DecodeFunctionSignature(start);
530 }
531
532
DecodeWasmFunction(Isolate * isolate,Zone * zone,ModuleEnv * module_env,const byte * function_start,const byte * function_end)533 FunctionResult DecodeWasmFunction(Isolate* isolate, Zone* zone,
534 ModuleEnv* module_env,
535 const byte* function_start,
536 const byte* function_end) {
537 size_t size = function_end - function_start;
538 if (function_start > function_end) return FunctionError("start > end");
539 if (size > kMaxFunctionSize)
540 return FunctionError("size > maximum function size");
541 WasmFunction* function = new WasmFunction();
542 ModuleDecoder decoder(zone, function_start, function_end, false);
543 return decoder.DecodeSingleFunction(module_env, function);
544 }
545 } // namespace wasm
546 } // namespace internal
547 } // namespace v8
548