1 // Copyright 2015 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if !V8_ENABLE_WEBASSEMBLY 6 #error This header should only be included if WebAssembly is enabled. 7 #endif // !V8_ENABLE_WEBASSEMBLY 8 9 #ifndef V8_WASM_DECODER_H_ 10 #define V8_WASM_DECODER_H_ 11 12 #include <cinttypes> 13 #include <cstdarg> 14 #include <memory> 15 16 #include "src/base/compiler-specific.h" 17 #include "src/base/memory.h" 18 #include "src/base/strings.h" 19 #include "src/base/vector.h" 20 #include "src/codegen/signature.h" 21 #include "src/flags/flags.h" 22 #include "src/wasm/wasm-opcodes.h" 23 #include "src/wasm/wasm-result.h" 24 #include "src/zone/zone-containers.h" 25 26 namespace v8 { 27 namespace internal { 28 namespace wasm { 29 30 #define TRACE(...) \ 31 do { \ 32 if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \ 33 } while (false) 34 #define TRACE_IF(cond, ...) \ 35 do { \ 36 if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \ 37 } while (false) 38 39 // A {DecodeResult} only stores the failure / success status, but no data. 40 using DecodeResult = VoidResult; 41 42 // A helper utility to decode bytes, integers, fields, varints, etc, from 43 // a buffer of bytes. 44 class Decoder { 45 public: 46 // {ValidateFlag} can be used in a boolean manner ({if (!validate) ...}). 47 enum ValidateFlag : int8_t { 48 kNoValidation = 0, // Don't run validation, assume valid input. 49 kBooleanValidation, // Run validation but only store a generic error. 50 kFullValidation // Run full validation with error message and location. 51 }; 52 53 enum TraceFlag : bool { kTrace = true, kNoTrace = false }; 54 55 Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0) Decoder(start,start,end,buffer_offset)56 : Decoder(start, start, end, buffer_offset) {} 57 explicit Decoder(const base::Vector<const byte> bytes, 58 uint32_t buffer_offset = 0) 59 : Decoder(bytes.begin(), bytes.begin() + bytes.length(), buffer_offset) {} 60 Decoder(const byte* start, const byte* pc, const byte* end, 61 uint32_t buffer_offset = 0) start_(start)62 : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) { 63 DCHECK_LE(start, pc); 64 DCHECK_LE(pc, end); 65 DCHECK_EQ(static_cast<uint32_t>(end - start), end - start); 66 } 67 68 virtual ~Decoder() = default; 69 70 // Ensures there are at least {length} bytes left to read, starting at {pc}. validate_size(const byte * pc,uint32_t length,const char * msg)71 bool validate_size(const byte* pc, uint32_t length, const char* msg) { 72 DCHECK_LE(start_, pc); 73 if (V8_UNLIKELY(pc > end_ || length > static_cast<uint32_t>(end_ - pc))) { 74 error(pc, msg); 75 return false; 76 } 77 return true; 78 } 79 80 // Reads an 8-bit unsigned integer. 81 template <ValidateFlag validate> 82 uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") { 83 return read_little_endian<uint8_t, validate>(pc, msg); 84 } 85 86 // Reads a 16-bit unsigned integer (little endian). 87 template <ValidateFlag validate> 88 uint16_t read_u16(const byte* pc, const char* msg = "expected 2 bytes") { 89 return read_little_endian<uint16_t, validate>(pc, msg); 90 } 91 92 // Reads a 32-bit unsigned integer (little endian). 93 template <ValidateFlag validate> 94 uint32_t read_u32(const byte* pc, const char* msg = "expected 4 bytes") { 95 return read_little_endian<uint32_t, validate>(pc, msg); 96 } 97 98 // Reads a 64-bit unsigned integer (little endian). 99 template <ValidateFlag validate> 100 uint64_t read_u64(const byte* pc, const char* msg = "expected 8 bytes") { 101 return read_little_endian<uint64_t, validate>(pc, msg); 102 } 103 104 // Reads a variable-length unsigned integer (little endian). 105 template <ValidateFlag validate> 106 uint32_t read_u32v(const byte* pc, uint32_t* length, 107 const char* name = "LEB32") { 108 return read_leb<uint32_t, validate, kNoTrace>(pc, length, name); 109 } 110 111 // Reads a variable-length signed integer (little endian). 112 template <ValidateFlag validate> 113 int32_t read_i32v(const byte* pc, uint32_t* length, 114 const char* name = "signed LEB32") { 115 return read_leb<int32_t, validate, kNoTrace>(pc, length, name); 116 } 117 118 // Reads a variable-length unsigned integer (little endian). 119 template <ValidateFlag validate> 120 uint64_t read_u64v(const byte* pc, uint32_t* length, 121 const char* name = "LEB64") { 122 return read_leb<uint64_t, validate, kNoTrace>(pc, length, name); 123 } 124 125 // Reads a variable-length signed integer (little endian). 126 template <ValidateFlag validate> 127 int64_t read_i64v(const byte* pc, uint32_t* length, 128 const char* name = "signed LEB64") { 129 return read_leb<int64_t, validate, kNoTrace>(pc, length, name); 130 } 131 132 // Reads a variable-length 33-bit signed integer (little endian). 133 template <ValidateFlag validate> 134 int64_t read_i33v(const byte* pc, uint32_t* length, 135 const char* name = "signed LEB33") { 136 return read_leb<int64_t, validate, kNoTrace, 33>(pc, length, name); 137 } 138 139 // Convenient overload for callers who don't care about length. 140 template <ValidateFlag validate> read_prefixed_opcode(const byte * pc)141 WasmOpcode read_prefixed_opcode(const byte* pc) { 142 uint32_t len; 143 return read_prefixed_opcode<validate>(pc, &len); 144 } 145 146 // Reads a prefixed-opcode, possibly with variable-length index. 147 // `length` is set to the number of bytes that make up this opcode, 148 // *including* the prefix byte. For most opcodes, it will be 2. 149 template <ValidateFlag validate> 150 WasmOpcode read_prefixed_opcode(const byte* pc, uint32_t* length, 151 const char* name = "prefixed opcode") { 152 uint32_t index; 153 154 // Prefixed opcodes all use LEB128 encoding. 155 index = read_u32v<validate>(pc + 1, length, "prefixed opcode index"); 156 *length += 1; // Prefix byte. 157 // Only support opcodes that go up to 0xFF (when decoded). Anything 158 // bigger will need 1 more byte, and the '<< 8' below will be wrong. 159 if (validate && V8_UNLIKELY(index > 0xff)) { 160 errorf(pc, "Invalid prefixed opcode %d", index); 161 // If size validation fails. 162 index = 0; 163 *length = 0; 164 } 165 166 return static_cast<WasmOpcode>((*pc) << 8 | index); 167 } 168 169 // Reads a 8-bit unsigned integer (byte) and advances {pc_}. 170 uint8_t consume_u8(const char* name = "uint8_t") { 171 return consume_little_endian<uint8_t>(name); 172 } 173 174 // Reads a 16-bit unsigned integer (little endian) and advances {pc_}. 175 uint16_t consume_u16(const char* name = "uint16_t") { 176 return consume_little_endian<uint16_t>(name); 177 } 178 179 // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}. 180 uint32_t consume_u32(const char* name = "uint32_t") { 181 return consume_little_endian<uint32_t>(name); 182 } 183 184 // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}. 185 uint32_t consume_u32v(const char* name = nullptr) { 186 uint32_t length = 0; 187 uint32_t result = 188 read_leb<uint32_t, kFullValidation, kTrace>(pc_, &length, name); 189 pc_ += length; 190 return result; 191 } 192 193 // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}. 194 int32_t consume_i32v(const char* name = nullptr) { 195 uint32_t length = 0; 196 int32_t result = 197 read_leb<int32_t, kFullValidation, kTrace>(pc_, &length, name); 198 pc_ += length; 199 return result; 200 } 201 202 // Reads a LEB128 variable-length unsigned 64-bit integer and advances {pc_}. 203 uint64_t consume_u64v(const char* name = nullptr) { 204 uint32_t length = 0; 205 uint64_t result = 206 read_leb<uint64_t, kFullValidation, kTrace>(pc_, &length, name); 207 pc_ += length; 208 return result; 209 } 210 211 // Reads a LEB128 variable-length signed 64-bit integer and advances {pc_}. 212 int64_t consume_i64v(const char* name = nullptr) { 213 uint32_t length = 0; 214 int64_t result = 215 read_leb<int64_t, kFullValidation, kTrace>(pc_, &length, name); 216 pc_ += length; 217 return result; 218 } 219 220 // Consume {size} bytes and send them to the bit bucket, advancing {pc_}. 221 void consume_bytes(uint32_t size, const char* name = "skip") { 222 // Only trace if the name is not null. 223 TRACE_IF(name, " +%u %-20s: %u bytes\n", pc_offset(), name, size); 224 if (checkAvailable(size)) { 225 pc_ += size; 226 } else { 227 pc_ = end_; 228 } 229 } 230 231 // Check that at least {size} bytes exist between {pc_} and {end_}. checkAvailable(uint32_t size)232 bool checkAvailable(uint32_t size) { 233 DCHECK_LE(pc_, end_); 234 if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) { 235 errorf(pc_, "expected %u bytes, fell off end", size); 236 return false; 237 } 238 return true; 239 } 240 241 // Use this for "boolean validation", i.e. if the error message is not used 242 // anyway. MarkError()243 void V8_NOINLINE MarkError() { 244 if (!ok()) return; 245 error_ = {0, "validation failed"}; 246 onFirstError(); 247 } 248 249 // Do not inline error methods. This has measurable impact on validation time, 250 // see https://crbug.com/910432. error(const char * msg)251 void V8_NOINLINE error(const char* msg) { errorf(pc_offset(), "%s", msg); } error(const uint8_t * pc,const char * msg)252 void V8_NOINLINE error(const uint8_t* pc, const char* msg) { 253 errorf(pc_offset(pc), "%s", msg); 254 } error(uint32_t offset,const char * msg)255 void V8_NOINLINE error(uint32_t offset, const char* msg) { 256 errorf(offset, "%s", msg); 257 } 258 errorf(const char * format,...)259 void V8_NOINLINE PRINTF_FORMAT(2, 3) errorf(const char* format, ...) { 260 va_list args; 261 va_start(args, format); 262 verrorf(pc_offset(), format, args); 263 va_end(args); 264 } 265 266 void V8_NOINLINE PRINTF_FORMAT(3, 4) errorf(uint32_t offset,const char * format,...)267 errorf(uint32_t offset, const char* format, ...) { 268 va_list args; 269 va_start(args, format); 270 verrorf(offset, format, args); 271 va_end(args); 272 } 273 274 void V8_NOINLINE PRINTF_FORMAT(3, 4) errorf(const uint8_t * pc,const char * format,...)275 errorf(const uint8_t* pc, const char* format, ...) { 276 va_list args; 277 va_start(args, format); 278 verrorf(pc_offset(pc), format, args); 279 va_end(args); 280 } 281 282 // Behavior triggered on first error, overridden in subclasses. onFirstError()283 virtual void onFirstError() {} 284 285 // Debugging helper to print a bytes range as hex bytes. traceByteRange(const byte * start,const byte * end)286 void traceByteRange(const byte* start, const byte* end) { 287 DCHECK_LE(start, end); 288 for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p); 289 } 290 291 // Debugging helper to print bytes up to the end. traceOffEnd()292 void traceOffEnd() { 293 traceByteRange(pc_, end_); 294 TRACE("<end>\n"); 295 } 296 297 // Converts the given value to a {Result}, copying the error if necessary. 298 template <typename T, typename U = typename std::remove_reference<T>::type> toResult(T && val)299 Result<U> toResult(T&& val) { 300 if (failed()) { 301 TRACE("Result error: %s\n", error_.message().c_str()); 302 return Result<U>{error_}; 303 } 304 return Result<U>{std::forward<T>(val)}; 305 } 306 307 // Resets the boundaries of this decoder. 308 void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) { 309 DCHECK_LE(start, end); 310 DCHECK_EQ(static_cast<uint32_t>(end - start), end - start); 311 start_ = start; 312 pc_ = start; 313 end_ = end; 314 buffer_offset_ = buffer_offset; 315 error_ = {}; 316 } 317 318 void Reset(base::Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) { 319 Reset(bytes.begin(), bytes.end(), buffer_offset); 320 } 321 ok()322 bool ok() const { return error_.empty(); } failed()323 bool failed() const { return !ok(); } more()324 bool more() const { return pc_ < end_; } error()325 const WasmError& error() const { return error_; } 326 start()327 const byte* start() const { return start_; } pc()328 const byte* pc() const { return pc_; } position()329 uint32_t V8_INLINE position() const { 330 return static_cast<uint32_t>(pc_ - start_); 331 } 332 // This needs to be inlined for performance (see https://crbug.com/910432). pc_offset(const uint8_t * pc)333 uint32_t V8_INLINE pc_offset(const uint8_t* pc) const { 334 DCHECK_LE(start_, pc); 335 DCHECK_GE(kMaxUInt32 - buffer_offset_, pc - start_); 336 return static_cast<uint32_t>(pc - start_) + buffer_offset_; 337 } pc_offset()338 uint32_t pc_offset() const { return pc_offset(pc_); } buffer_offset()339 uint32_t buffer_offset() const { return buffer_offset_; } 340 // Takes an offset relative to the module start and returns an offset relative 341 // to the current buffer of the decoder. GetBufferRelativeOffset(uint32_t offset)342 uint32_t GetBufferRelativeOffset(uint32_t offset) const { 343 DCHECK_LE(buffer_offset_, offset); 344 return offset - buffer_offset_; 345 } end()346 const byte* end() const { return end_; } set_end(const byte * end)347 void set_end(const byte* end) { end_ = end; } 348 349 // Check if the byte at {offset} from the current pc equals {expected}. lookahead(int offset,byte expected)350 bool lookahead(int offset, byte expected) { 351 DCHECK_LE(pc_, end_); 352 return end_ - pc_ > offset && pc_[offset] == expected; 353 } 354 355 protected: 356 const byte* start_; 357 const byte* pc_; 358 const byte* end_; 359 // The offset of the current buffer in the module. Needed for streaming. 360 uint32_t buffer_offset_; 361 WasmError error_; 362 363 private: verrorf(uint32_t offset,const char * format,va_list args)364 void verrorf(uint32_t offset, const char* format, va_list args) { 365 // Only report the first error. 366 if (!ok()) return; 367 constexpr int kMaxErrorMsg = 256; 368 base::EmbeddedVector<char, kMaxErrorMsg> buffer; 369 int len = base::VSNPrintF(buffer, format, args); 370 CHECK_LT(0, len); 371 error_ = {offset, {buffer.begin(), static_cast<size_t>(len)}}; 372 onFirstError(); 373 } 374 375 template <typename IntType, ValidateFlag validate> read_little_endian(const byte * pc,const char * msg)376 IntType read_little_endian(const byte* pc, const char* msg) { 377 if (!validate) { 378 DCHECK(validate_size(pc, sizeof(IntType), msg)); 379 } else if (!validate_size(pc, sizeof(IntType), msg)) { 380 return IntType{0}; 381 } 382 return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc)); 383 } 384 385 template <typename IntType> consume_little_endian(const char * name)386 IntType consume_little_endian(const char* name) { 387 TRACE(" +%u %-20s: ", pc_offset(), name); 388 if (!checkAvailable(sizeof(IntType))) { 389 traceOffEnd(); 390 pc_ = end_; 391 return IntType{0}; 392 } 393 IntType val = read_little_endian<IntType, kNoValidation>(pc_, name); 394 traceByteRange(pc_, pc_ + sizeof(IntType)); 395 TRACE("= %d\n", val); 396 pc_ += sizeof(IntType); 397 return val; 398 } 399 400 template <typename IntType, ValidateFlag validate, TraceFlag trace, 401 size_t size_in_bits = 8 * sizeof(IntType)> 402 V8_INLINE IntType read_leb(const byte* pc, uint32_t* length, 403 const char* name = "varint") { 404 static_assert(size_in_bits <= 8 * sizeof(IntType), 405 "leb does not fit in type"); 406 TRACE_IF(trace, " +%u %-20s: ", pc_offset(), name); 407 // Fast path for single-byte integers. 408 if ((!validate || V8_LIKELY(pc < end_)) && !(*pc & 0x80)) { 409 TRACE_IF(trace, "%02x ", *pc); 410 *length = 1; 411 IntType result = *pc; 412 if (std::is_signed<IntType>::value) { 413 // Perform sign extension. 414 constexpr int sign_ext_shift = int{8 * sizeof(IntType)} - 7; 415 result = (result << sign_ext_shift) >> sign_ext_shift; 416 TRACE_IF(trace, "= %" PRIi64 "\n", static_cast<int64_t>(result)); 417 } else { 418 TRACE_IF(trace, "= %" PRIu64 "\n", static_cast<uint64_t>(result)); 419 } 420 return result; 421 } 422 return read_leb_slowpath<IntType, validate, trace, size_in_bits>(pc, length, 423 name); 424 } 425 426 template <typename IntType, ValidateFlag validate, TraceFlag trace, 427 size_t size_in_bits = 8 * sizeof(IntType)> read_leb_slowpath(const byte * pc,uint32_t * length,const char * name)428 V8_NOINLINE IntType read_leb_slowpath(const byte* pc, uint32_t* length, 429 const char* name) { 430 // Create an unrolled LEB decoding function per integer type. 431 return read_leb_tail<IntType, validate, trace, size_in_bits, 0>(pc, length, 432 name, 0); 433 } 434 435 template <typename IntType, ValidateFlag validate, TraceFlag trace, 436 size_t size_in_bits, int byte_index> read_leb_tail(const byte * pc,uint32_t * length,const char * name,IntType result)437 V8_INLINE IntType read_leb_tail(const byte* pc, uint32_t* length, 438 const char* name, IntType result) { 439 constexpr bool is_signed = std::is_signed<IntType>::value; 440 constexpr int kMaxLength = (size_in_bits + 6) / 7; 441 static_assert(byte_index < kMaxLength, "invalid template instantiation"); 442 constexpr int shift = byte_index * 7; 443 constexpr bool is_last_byte = byte_index == kMaxLength - 1; 444 const bool at_end = validate && pc >= end_; 445 byte b = 0; 446 if (V8_LIKELY(!at_end)) { 447 DCHECK_LT(pc, end_); 448 b = *pc; 449 TRACE_IF(trace, "%02x ", b); 450 using Unsigned = typename std::make_unsigned<IntType>::type; 451 result = result | 452 (static_cast<Unsigned>(static_cast<IntType>(b) & 0x7f) << shift); 453 } 454 if (!is_last_byte && (b & 0x80)) { 455 // Make sure that we only instantiate the template for valid byte indexes. 456 // Compilers are not smart enough to figure out statically that the 457 // following call is unreachable if is_last_byte is false. 458 constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1); 459 return read_leb_tail<IntType, validate, trace, size_in_bits, 460 next_byte_index>(pc + 1, length, name, result); 461 } 462 *length = byte_index + (at_end ? 0 : 1); 463 if (validate && V8_UNLIKELY(at_end || (b & 0x80))) { 464 TRACE_IF(trace, at_end ? "<end> " : "<length overflow> "); 465 if (validate == kFullValidation) { 466 errorf(pc, "expected %s", name); 467 } else { 468 MarkError(); 469 } 470 result = 0; 471 *length = 0; 472 } 473 if (is_last_byte) { 474 // A signed-LEB128 must sign-extend the final byte, excluding its 475 // most-significant bit; e.g. for a 32-bit LEB128: 476 // kExtraBits = 4 (== 32 - (5-1) * 7) 477 // For unsigned values, the extra bits must be all zero. 478 // For signed values, the extra bits *plus* the most significant bit must 479 // either be 0, or all ones. 480 constexpr int kExtraBits = size_in_bits - ((kMaxLength - 1) * 7); 481 constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0); 482 const byte checked_bits = b & (0xFF << kSignExtBits); 483 constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits); 484 const bool valid_extra_bits = 485 checked_bits == 0 || 486 (is_signed && checked_bits == kSignExtendedExtraBits); 487 if (!validate) { 488 DCHECK(valid_extra_bits); 489 } else if (V8_UNLIKELY(!valid_extra_bits)) { 490 if (validate == kFullValidation) { 491 error(pc, "extra bits in varint"); 492 } else { 493 MarkError(); 494 } 495 result = 0; 496 *length = 0; 497 } 498 } 499 constexpr int sign_ext_shift = 500 is_signed ? std::max(0, int{8 * sizeof(IntType)} - shift - 7) : 0; 501 // Perform sign extension. 502 result = (result << sign_ext_shift) >> sign_ext_shift; 503 if (trace && is_signed) { 504 TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result)); 505 } else if (trace) { 506 TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result)); 507 } 508 return result; 509 } 510 }; 511 512 #undef TRACE 513 } // namespace wasm 514 } // namespace internal 515 } // namespace v8 516 517 #endif // V8_WASM_DECODER_H_ 518