• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if !V8_ENABLE_WEBASSEMBLY
6 #error This header should only be included if WebAssembly is enabled.
7 #endif  // !V8_ENABLE_WEBASSEMBLY
8 
9 #ifndef V8_WASM_DECODER_H_
10 #define V8_WASM_DECODER_H_
11 
12 #include <cinttypes>
13 #include <cstdarg>
14 #include <memory>
15 
16 #include "src/base/compiler-specific.h"
17 #include "src/base/memory.h"
18 #include "src/base/strings.h"
19 #include "src/base/vector.h"
20 #include "src/codegen/signature.h"
21 #include "src/flags/flags.h"
22 #include "src/wasm/wasm-opcodes.h"
23 #include "src/wasm/wasm-result.h"
24 #include "src/zone/zone-containers.h"
25 
26 namespace v8 {
27 namespace internal {
28 namespace wasm {
29 
30 #define TRACE(...)                                    \
31   do {                                                \
32     if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
33   } while (false)
34 #define TRACE_IF(cond, ...)                                     \
35   do {                                                          \
36     if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \
37   } while (false)
38 
39 // A {DecodeResult} only stores the failure / success status, but no data.
40 using DecodeResult = VoidResult;
41 
42 // A helper utility to decode bytes, integers, fields, varints, etc, from
43 // a buffer of bytes.
44 class Decoder {
45  public:
46   // {ValidateFlag} can be used in a boolean manner ({if (!validate) ...}).
47   enum ValidateFlag : int8_t {
48     kNoValidation = 0,   // Don't run validation, assume valid input.
49     kBooleanValidation,  // Run validation but only store a generic error.
50     kFullValidation      // Run full validation with error message and location.
51   };
52 
53   enum TraceFlag : bool { kTrace = true, kNoTrace = false };
54 
55   Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0)
Decoder(start,start,end,buffer_offset)56       : Decoder(start, start, end, buffer_offset) {}
57   explicit Decoder(const base::Vector<const byte> bytes,
58                    uint32_t buffer_offset = 0)
59       : Decoder(bytes.begin(), bytes.begin() + bytes.length(), buffer_offset) {}
60   Decoder(const byte* start, const byte* pc, const byte* end,
61           uint32_t buffer_offset = 0)
start_(start)62       : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) {
63     DCHECK_LE(start, pc);
64     DCHECK_LE(pc, end);
65     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
66   }
67 
68   virtual ~Decoder() = default;
69 
70   // Ensures there are at least {length} bytes left to read, starting at {pc}.
validate_size(const byte * pc,uint32_t length,const char * msg)71   bool validate_size(const byte* pc, uint32_t length, const char* msg) {
72     DCHECK_LE(start_, pc);
73     if (V8_UNLIKELY(pc > end_ || length > static_cast<uint32_t>(end_ - pc))) {
74       error(pc, msg);
75       return false;
76     }
77     return true;
78   }
79 
80   // Reads an 8-bit unsigned integer.
81   template <ValidateFlag validate>
82   uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") {
83     return read_little_endian<uint8_t, validate>(pc, msg);
84   }
85 
86   // Reads a 16-bit unsigned integer (little endian).
87   template <ValidateFlag validate>
88   uint16_t read_u16(const byte* pc, const char* msg = "expected 2 bytes") {
89     return read_little_endian<uint16_t, validate>(pc, msg);
90   }
91 
92   // Reads a 32-bit unsigned integer (little endian).
93   template <ValidateFlag validate>
94   uint32_t read_u32(const byte* pc, const char* msg = "expected 4 bytes") {
95     return read_little_endian<uint32_t, validate>(pc, msg);
96   }
97 
98   // Reads a 64-bit unsigned integer (little endian).
99   template <ValidateFlag validate>
100   uint64_t read_u64(const byte* pc, const char* msg = "expected 8 bytes") {
101     return read_little_endian<uint64_t, validate>(pc, msg);
102   }
103 
104   // Reads a variable-length unsigned integer (little endian).
105   template <ValidateFlag validate>
106   uint32_t read_u32v(const byte* pc, uint32_t* length,
107                      const char* name = "LEB32") {
108     return read_leb<uint32_t, validate, kNoTrace>(pc, length, name);
109   }
110 
111   // Reads a variable-length signed integer (little endian).
112   template <ValidateFlag validate>
113   int32_t read_i32v(const byte* pc, uint32_t* length,
114                     const char* name = "signed LEB32") {
115     return read_leb<int32_t, validate, kNoTrace>(pc, length, name);
116   }
117 
118   // Reads a variable-length unsigned integer (little endian).
119   template <ValidateFlag validate>
120   uint64_t read_u64v(const byte* pc, uint32_t* length,
121                      const char* name = "LEB64") {
122     return read_leb<uint64_t, validate, kNoTrace>(pc, length, name);
123   }
124 
125   // Reads a variable-length signed integer (little endian).
126   template <ValidateFlag validate>
127   int64_t read_i64v(const byte* pc, uint32_t* length,
128                     const char* name = "signed LEB64") {
129     return read_leb<int64_t, validate, kNoTrace>(pc, length, name);
130   }
131 
132   // Reads a variable-length 33-bit signed integer (little endian).
133   template <ValidateFlag validate>
134   int64_t read_i33v(const byte* pc, uint32_t* length,
135                     const char* name = "signed LEB33") {
136     return read_leb<int64_t, validate, kNoTrace, 33>(pc, length, name);
137   }
138 
139   // Convenient overload for callers who don't care about length.
140   template <ValidateFlag validate>
read_prefixed_opcode(const byte * pc)141   WasmOpcode read_prefixed_opcode(const byte* pc) {
142     uint32_t len;
143     return read_prefixed_opcode<validate>(pc, &len);
144   }
145 
146   // Reads a prefixed-opcode, possibly with variable-length index.
147   // `length` is set to the number of bytes that make up this opcode,
148   // *including* the prefix byte. For most opcodes, it will be 2.
149   template <ValidateFlag validate>
150   WasmOpcode read_prefixed_opcode(const byte* pc, uint32_t* length,
151                                   const char* name = "prefixed opcode") {
152     uint32_t index;
153 
154     // Prefixed opcodes all use LEB128 encoding.
155     index = read_u32v<validate>(pc + 1, length, "prefixed opcode index");
156     *length += 1;  // Prefix byte.
157     // Only support opcodes that go up to 0xFF (when decoded). Anything
158     // bigger will need 1 more byte, and the '<< 8' below will be wrong.
159     if (validate && V8_UNLIKELY(index > 0xff)) {
160       errorf(pc, "Invalid prefixed opcode %d", index);
161       // If size validation fails.
162       index = 0;
163       *length = 0;
164     }
165 
166     return static_cast<WasmOpcode>((*pc) << 8 | index);
167   }
168 
169   // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
170   uint8_t consume_u8(const char* name = "uint8_t") {
171     return consume_little_endian<uint8_t>(name);
172   }
173 
174   // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
175   uint16_t consume_u16(const char* name = "uint16_t") {
176     return consume_little_endian<uint16_t>(name);
177   }
178 
179   // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
180   uint32_t consume_u32(const char* name = "uint32_t") {
181     return consume_little_endian<uint32_t>(name);
182   }
183 
184   // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
185   uint32_t consume_u32v(const char* name = nullptr) {
186     uint32_t length = 0;
187     uint32_t result =
188         read_leb<uint32_t, kFullValidation, kTrace>(pc_, &length, name);
189     pc_ += length;
190     return result;
191   }
192 
193   // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
194   int32_t consume_i32v(const char* name = nullptr) {
195     uint32_t length = 0;
196     int32_t result =
197         read_leb<int32_t, kFullValidation, kTrace>(pc_, &length, name);
198     pc_ += length;
199     return result;
200   }
201 
202   // Reads a LEB128 variable-length unsigned 64-bit integer and advances {pc_}.
203   uint64_t consume_u64v(const char* name = nullptr) {
204     uint32_t length = 0;
205     uint64_t result =
206         read_leb<uint64_t, kFullValidation, kTrace>(pc_, &length, name);
207     pc_ += length;
208     return result;
209   }
210 
211   // Reads a LEB128 variable-length signed 64-bit integer and advances {pc_}.
212   int64_t consume_i64v(const char* name = nullptr) {
213     uint32_t length = 0;
214     int64_t result =
215         read_leb<int64_t, kFullValidation, kTrace>(pc_, &length, name);
216     pc_ += length;
217     return result;
218   }
219 
220   // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
221   void consume_bytes(uint32_t size, const char* name = "skip") {
222     // Only trace if the name is not null.
223     TRACE_IF(name, "  +%u  %-20s: %u bytes\n", pc_offset(), name, size);
224     if (checkAvailable(size)) {
225       pc_ += size;
226     } else {
227       pc_ = end_;
228     }
229   }
230 
231   // Check that at least {size} bytes exist between {pc_} and {end_}.
checkAvailable(uint32_t size)232   bool checkAvailable(uint32_t size) {
233     DCHECK_LE(pc_, end_);
234     if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) {
235       errorf(pc_, "expected %u bytes, fell off end", size);
236       return false;
237     }
238     return true;
239   }
240 
241   // Use this for "boolean validation", i.e. if the error message is not used
242   // anyway.
MarkError()243   void V8_NOINLINE MarkError() {
244     if (!ok()) return;
245     error_ = {0, "validation failed"};
246     onFirstError();
247   }
248 
249   // Do not inline error methods. This has measurable impact on validation time,
250   // see https://crbug.com/910432.
error(const char * msg)251   void V8_NOINLINE error(const char* msg) { errorf(pc_offset(), "%s", msg); }
error(const uint8_t * pc,const char * msg)252   void V8_NOINLINE error(const uint8_t* pc, const char* msg) {
253     errorf(pc_offset(pc), "%s", msg);
254   }
error(uint32_t offset,const char * msg)255   void V8_NOINLINE error(uint32_t offset, const char* msg) {
256     errorf(offset, "%s", msg);
257   }
258 
errorf(const char * format,...)259   void V8_NOINLINE PRINTF_FORMAT(2, 3) errorf(const char* format, ...) {
260     va_list args;
261     va_start(args, format);
262     verrorf(pc_offset(), format, args);
263     va_end(args);
264   }
265 
266   void V8_NOINLINE PRINTF_FORMAT(3, 4)
errorf(uint32_t offset,const char * format,...)267       errorf(uint32_t offset, const char* format, ...) {
268     va_list args;
269     va_start(args, format);
270     verrorf(offset, format, args);
271     va_end(args);
272   }
273 
274   void V8_NOINLINE PRINTF_FORMAT(3, 4)
errorf(const uint8_t * pc,const char * format,...)275       errorf(const uint8_t* pc, const char* format, ...) {
276     va_list args;
277     va_start(args, format);
278     verrorf(pc_offset(pc), format, args);
279     va_end(args);
280   }
281 
282   // Behavior triggered on first error, overridden in subclasses.
onFirstError()283   virtual void onFirstError() {}
284 
285   // Debugging helper to print a bytes range as hex bytes.
traceByteRange(const byte * start,const byte * end)286   void traceByteRange(const byte* start, const byte* end) {
287     DCHECK_LE(start, end);
288     for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p);
289   }
290 
291   // Debugging helper to print bytes up to the end.
traceOffEnd()292   void traceOffEnd() {
293     traceByteRange(pc_, end_);
294     TRACE("<end>\n");
295   }
296 
297   // Converts the given value to a {Result}, copying the error if necessary.
298   template <typename T, typename U = typename std::remove_reference<T>::type>
toResult(T && val)299   Result<U> toResult(T&& val) {
300     if (failed()) {
301       TRACE("Result error: %s\n", error_.message().c_str());
302       return Result<U>{error_};
303     }
304     return Result<U>{std::forward<T>(val)};
305   }
306 
307   // Resets the boundaries of this decoder.
308   void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) {
309     DCHECK_LE(start, end);
310     DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
311     start_ = start;
312     pc_ = start;
313     end_ = end;
314     buffer_offset_ = buffer_offset;
315     error_ = {};
316   }
317 
318   void Reset(base::Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) {
319     Reset(bytes.begin(), bytes.end(), buffer_offset);
320   }
321 
ok()322   bool ok() const { return error_.empty(); }
failed()323   bool failed() const { return !ok(); }
more()324   bool more() const { return pc_ < end_; }
error()325   const WasmError& error() const { return error_; }
326 
start()327   const byte* start() const { return start_; }
pc()328   const byte* pc() const { return pc_; }
position()329   uint32_t V8_INLINE position() const {
330     return static_cast<uint32_t>(pc_ - start_);
331   }
332   // This needs to be inlined for performance (see https://crbug.com/910432).
pc_offset(const uint8_t * pc)333   uint32_t V8_INLINE pc_offset(const uint8_t* pc) const {
334     DCHECK_LE(start_, pc);
335     DCHECK_GE(kMaxUInt32 - buffer_offset_, pc - start_);
336     return static_cast<uint32_t>(pc - start_) + buffer_offset_;
337   }
pc_offset()338   uint32_t pc_offset() const { return pc_offset(pc_); }
buffer_offset()339   uint32_t buffer_offset() const { return buffer_offset_; }
340   // Takes an offset relative to the module start and returns an offset relative
341   // to the current buffer of the decoder.
GetBufferRelativeOffset(uint32_t offset)342   uint32_t GetBufferRelativeOffset(uint32_t offset) const {
343     DCHECK_LE(buffer_offset_, offset);
344     return offset - buffer_offset_;
345   }
end()346   const byte* end() const { return end_; }
set_end(const byte * end)347   void set_end(const byte* end) { end_ = end; }
348 
349   // Check if the byte at {offset} from the current pc equals {expected}.
lookahead(int offset,byte expected)350   bool lookahead(int offset, byte expected) {
351     DCHECK_LE(pc_, end_);
352     return end_ - pc_ > offset && pc_[offset] == expected;
353   }
354 
355  protected:
356   const byte* start_;
357   const byte* pc_;
358   const byte* end_;
359   // The offset of the current buffer in the module. Needed for streaming.
360   uint32_t buffer_offset_;
361   WasmError error_;
362 
363  private:
verrorf(uint32_t offset,const char * format,va_list args)364   void verrorf(uint32_t offset, const char* format, va_list args) {
365     // Only report the first error.
366     if (!ok()) return;
367     constexpr int kMaxErrorMsg = 256;
368     base::EmbeddedVector<char, kMaxErrorMsg> buffer;
369     int len = base::VSNPrintF(buffer, format, args);
370     CHECK_LT(0, len);
371     error_ = {offset, {buffer.begin(), static_cast<size_t>(len)}};
372     onFirstError();
373   }
374 
375   template <typename IntType, ValidateFlag validate>
read_little_endian(const byte * pc,const char * msg)376   IntType read_little_endian(const byte* pc, const char* msg) {
377     if (!validate) {
378       DCHECK(validate_size(pc, sizeof(IntType), msg));
379     } else if (!validate_size(pc, sizeof(IntType), msg)) {
380       return IntType{0};
381     }
382     return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc));
383   }
384 
385   template <typename IntType>
consume_little_endian(const char * name)386   IntType consume_little_endian(const char* name) {
387     TRACE("  +%u  %-20s: ", pc_offset(), name);
388     if (!checkAvailable(sizeof(IntType))) {
389       traceOffEnd();
390       pc_ = end_;
391       return IntType{0};
392     }
393     IntType val = read_little_endian<IntType, kNoValidation>(pc_, name);
394     traceByteRange(pc_, pc_ + sizeof(IntType));
395     TRACE("= %d\n", val);
396     pc_ += sizeof(IntType);
397     return val;
398   }
399 
400   template <typename IntType, ValidateFlag validate, TraceFlag trace,
401             size_t size_in_bits = 8 * sizeof(IntType)>
402   V8_INLINE IntType read_leb(const byte* pc, uint32_t* length,
403                              const char* name = "varint") {
404     static_assert(size_in_bits <= 8 * sizeof(IntType),
405                   "leb does not fit in type");
406     TRACE_IF(trace, "  +%u  %-20s: ", pc_offset(), name);
407     // Fast path for single-byte integers.
408     if ((!validate || V8_LIKELY(pc < end_)) && !(*pc & 0x80)) {
409       TRACE_IF(trace, "%02x ", *pc);
410       *length = 1;
411       IntType result = *pc;
412       if (std::is_signed<IntType>::value) {
413         // Perform sign extension.
414         constexpr int sign_ext_shift = int{8 * sizeof(IntType)} - 7;
415         result = (result << sign_ext_shift) >> sign_ext_shift;
416         TRACE_IF(trace, "= %" PRIi64 "\n", static_cast<int64_t>(result));
417       } else {
418         TRACE_IF(trace, "= %" PRIu64 "\n", static_cast<uint64_t>(result));
419       }
420       return result;
421     }
422     return read_leb_slowpath<IntType, validate, trace, size_in_bits>(pc, length,
423                                                                      name);
424   }
425 
426   template <typename IntType, ValidateFlag validate, TraceFlag trace,
427             size_t size_in_bits = 8 * sizeof(IntType)>
read_leb_slowpath(const byte * pc,uint32_t * length,const char * name)428   V8_NOINLINE IntType read_leb_slowpath(const byte* pc, uint32_t* length,
429                                         const char* name) {
430     // Create an unrolled LEB decoding function per integer type.
431     return read_leb_tail<IntType, validate, trace, size_in_bits, 0>(pc, length,
432                                                                     name, 0);
433   }
434 
435   template <typename IntType, ValidateFlag validate, TraceFlag trace,
436             size_t size_in_bits, int byte_index>
read_leb_tail(const byte * pc,uint32_t * length,const char * name,IntType result)437   V8_INLINE IntType read_leb_tail(const byte* pc, uint32_t* length,
438                                   const char* name, IntType result) {
439     constexpr bool is_signed = std::is_signed<IntType>::value;
440     constexpr int kMaxLength = (size_in_bits + 6) / 7;
441     static_assert(byte_index < kMaxLength, "invalid template instantiation");
442     constexpr int shift = byte_index * 7;
443     constexpr bool is_last_byte = byte_index == kMaxLength - 1;
444     const bool at_end = validate && pc >= end_;
445     byte b = 0;
446     if (V8_LIKELY(!at_end)) {
447       DCHECK_LT(pc, end_);
448       b = *pc;
449       TRACE_IF(trace, "%02x ", b);
450       using Unsigned = typename std::make_unsigned<IntType>::type;
451       result = result |
452                (static_cast<Unsigned>(static_cast<IntType>(b) & 0x7f) << shift);
453     }
454     if (!is_last_byte && (b & 0x80)) {
455       // Make sure that we only instantiate the template for valid byte indexes.
456       // Compilers are not smart enough to figure out statically that the
457       // following call is unreachable if is_last_byte is false.
458       constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1);
459       return read_leb_tail<IntType, validate, trace, size_in_bits,
460                            next_byte_index>(pc + 1, length, name, result);
461     }
462     *length = byte_index + (at_end ? 0 : 1);
463     if (validate && V8_UNLIKELY(at_end || (b & 0x80))) {
464       TRACE_IF(trace, at_end ? "<end> " : "<length overflow> ");
465       if (validate == kFullValidation) {
466         errorf(pc, "expected %s", name);
467       } else {
468         MarkError();
469       }
470       result = 0;
471       *length = 0;
472     }
473     if (is_last_byte) {
474       // A signed-LEB128 must sign-extend the final byte, excluding its
475       // most-significant bit; e.g. for a 32-bit LEB128:
476       //   kExtraBits = 4  (== 32 - (5-1) * 7)
477       // For unsigned values, the extra bits must be all zero.
478       // For signed values, the extra bits *plus* the most significant bit must
479       // either be 0, or all ones.
480       constexpr int kExtraBits = size_in_bits - ((kMaxLength - 1) * 7);
481       constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0);
482       const byte checked_bits = b & (0xFF << kSignExtBits);
483       constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits);
484       const bool valid_extra_bits =
485           checked_bits == 0 ||
486           (is_signed && checked_bits == kSignExtendedExtraBits);
487       if (!validate) {
488         DCHECK(valid_extra_bits);
489       } else if (V8_UNLIKELY(!valid_extra_bits)) {
490         if (validate == kFullValidation) {
491           error(pc, "extra bits in varint");
492         } else {
493           MarkError();
494         }
495         result = 0;
496         *length = 0;
497       }
498     }
499     constexpr int sign_ext_shift =
500         is_signed ? std::max(0, int{8 * sizeof(IntType)} - shift - 7) : 0;
501     // Perform sign extension.
502     result = (result << sign_ext_shift) >> sign_ext_shift;
503     if (trace && is_signed) {
504       TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
505     } else if (trace) {
506       TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
507     }
508     return result;
509   }
510 };
511 
512 #undef TRACE
513 }  // namespace wasm
514 }  // namespace internal
515 }  // namespace v8
516 
517 #endif  // V8_WASM_DECODER_H_
518