1 // Copyright 2015 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_WASM_DECODER_H_ 6 #define V8_WASM_DECODER_H_ 7 8 #include "src/base/compiler-specific.h" 9 #include "src/base/smart-pointers.h" 10 #include "src/flags.h" 11 #include "src/signature.h" 12 #include "src/utils.h" 13 #include "src/wasm/wasm-result.h" 14 #include "src/zone-containers.h" 15 16 namespace v8 { 17 namespace internal { 18 namespace wasm { 19 20 #if DEBUG 21 #define TRACE(...) \ 22 do { \ 23 if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \ 24 } while (false) 25 #else 26 #define TRACE(...) 27 #endif 28 29 #if !(V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_ARM) 30 #define UNALIGNED_ACCESS_OK 1 31 #else 32 #define UNALIGNED_ACCESS_OK 0 33 #endif 34 35 // A helper utility to decode bytes, integers, fields, varints, etc, from 36 // a buffer of bytes. 37 class Decoder { 38 public: Decoder(const byte * start,const byte * end)39 Decoder(const byte* start, const byte* end) 40 : start_(start), 41 pc_(start), 42 limit_(end), 43 end_(end), 44 error_pc_(nullptr), 45 error_pt_(nullptr) {} 46 ~Decoder()47 virtual ~Decoder() {} 48 check(const byte * base,unsigned offset,unsigned length,const char * msg)49 inline bool check(const byte* base, unsigned offset, unsigned length, 50 const char* msg) { 51 DCHECK_GE(base, start_); 52 if ((base + offset + length) > limit_) { 53 error(base, base + offset, "%s", msg); 54 return false; 55 } 56 return true; 57 } 58 59 // Reads a single 8-bit byte, reporting an error if out of bounds. 60 inline uint8_t checked_read_u8(const byte* base, unsigned offset, 61 const char* msg = "expected 1 byte") { 62 return check(base, offset, 1, msg) ? base[offset] : 0; 63 } 64 65 // Reads 16-bit word, reporting an error if out of bounds. 66 inline uint16_t checked_read_u16(const byte* base, unsigned offset, 67 const char* msg = "expected 2 bytes") { 68 return check(base, offset, 2, msg) ? read_u16(base + offset) : 0; 69 } 70 71 // Reads 32-bit word, reporting an error if out of bounds. 72 inline uint32_t checked_read_u32(const byte* base, unsigned offset, 73 const char* msg = "expected 4 bytes") { 74 return check(base, offset, 4, msg) ? read_u32(base + offset) : 0; 75 } 76 77 // Reads 64-bit word, reporting an error if out of bounds. 78 inline uint64_t checked_read_u64(const byte* base, unsigned offset, 79 const char* msg = "expected 8 bytes") { 80 return check(base, offset, 8, msg) ? read_u64(base + offset) : 0; 81 } 82 83 // Reads a variable-length unsigned integer (little endian). 84 uint32_t checked_read_u32v(const byte* base, unsigned offset, 85 unsigned* length, 86 const char* msg = "expected LEB32") { 87 return checked_read_leb<uint32_t, false>(base, offset, length, msg); 88 } 89 90 // Reads a variable-length signed integer (little endian). 91 int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length, 92 const char* msg = "expected SLEB32") { 93 uint32_t result = 94 checked_read_leb<uint32_t, true>(base, offset, length, msg); 95 if (*length == 5) return bit_cast<int32_t>(result); 96 if (*length > 0) { 97 int shift = 32 - 7 * *length; 98 // Perform sign extension. 99 return bit_cast<int32_t>(result << shift) >> shift; 100 } 101 return 0; 102 } 103 104 // Reads a variable-length unsigned integer (little endian). 105 uint64_t checked_read_u64v(const byte* base, unsigned offset, 106 unsigned* length, 107 const char* msg = "expected LEB64") { 108 return checked_read_leb<uint64_t, false>(base, offset, length, msg); 109 } 110 111 // Reads a variable-length signed integer (little endian). 112 int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length, 113 const char* msg = "expected SLEB64") { 114 uint64_t result = 115 checked_read_leb<uint64_t, true>(base, offset, length, msg); 116 if (*length == 10) return bit_cast<int64_t>(result); 117 if (*length > 0) { 118 int shift = 64 - 7 * *length; 119 // Perform sign extension. 120 return bit_cast<int64_t>(result << shift) >> shift; 121 } 122 return 0; 123 } 124 125 // Reads a single 16-bit unsigned integer (little endian). read_u16(const byte * ptr)126 inline uint16_t read_u16(const byte* ptr) { 127 DCHECK(ptr >= start_ && (ptr + 2) <= end_); 128 #if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK 129 return *reinterpret_cast<const uint16_t*>(ptr); 130 #else 131 uint16_t b0 = ptr[0]; 132 uint16_t b1 = ptr[1]; 133 return (b1 << 8) | b0; 134 #endif 135 } 136 137 // Reads a single 32-bit unsigned integer (little endian). read_u32(const byte * ptr)138 inline uint32_t read_u32(const byte* ptr) { 139 DCHECK(ptr >= start_ && (ptr + 4) <= end_); 140 #if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK 141 return *reinterpret_cast<const uint32_t*>(ptr); 142 #else 143 uint32_t b0 = ptr[0]; 144 uint32_t b1 = ptr[1]; 145 uint32_t b2 = ptr[2]; 146 uint32_t b3 = ptr[3]; 147 return (b3 << 24) | (b2 << 16) | (b1 << 8) | b0; 148 #endif 149 } 150 151 // Reads a single 64-bit unsigned integer (little endian). read_u64(const byte * ptr)152 inline uint64_t read_u64(const byte* ptr) { 153 DCHECK(ptr >= start_ && (ptr + 8) <= end_); 154 #if V8_TARGET_LITTLE_ENDIAN && UNALIGNED_ACCESS_OK 155 return *reinterpret_cast<const uint64_t*>(ptr); 156 #else 157 uint32_t b0 = ptr[0]; 158 uint32_t b1 = ptr[1]; 159 uint32_t b2 = ptr[2]; 160 uint32_t b3 = ptr[3]; 161 uint32_t low = (b3 << 24) | (b2 << 16) | (b1 << 8) | b0; 162 uint32_t b4 = ptr[4]; 163 uint32_t b5 = ptr[5]; 164 uint32_t b6 = ptr[6]; 165 uint32_t b7 = ptr[7]; 166 uint64_t high = (b7 << 24) | (b6 << 16) | (b5 << 8) | b4; 167 return (high << 32) | low; 168 #endif 169 } 170 171 // Reads a 8-bit unsigned integer (byte) and advances {pc_}. 172 uint8_t consume_u8(const char* name = nullptr) { 173 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_), 174 name ? name : "uint8_t"); 175 if (checkAvailable(1)) { 176 byte val = *(pc_++); 177 TRACE("%02x = %d\n", val, val); 178 return val; 179 } 180 return traceOffEnd<uint8_t>(); 181 } 182 183 // Reads a 16-bit unsigned integer (little endian) and advances {pc_}. 184 uint16_t consume_u16(const char* name = nullptr) { 185 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_), 186 name ? name : "uint16_t"); 187 if (checkAvailable(2)) { 188 uint16_t val = read_u16(pc_); 189 TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val); 190 pc_ += 2; 191 return val; 192 } 193 return traceOffEnd<uint16_t>(); 194 } 195 196 // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}. 197 uint32_t consume_u32(const char* name = nullptr) { 198 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_), 199 name ? name : "uint32_t"); 200 if (checkAvailable(4)) { 201 uint32_t val = read_u32(pc_); 202 TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val); 203 pc_ += 4; 204 return val; 205 } 206 return traceOffEnd<uint32_t>(); 207 } 208 209 // Reads a LEB128 variable-length 32-bit integer and advances {pc_}. 210 uint32_t consume_u32v(const char* name = nullptr) { 211 TRACE(" +%d %-20s: ", static_cast<int>(pc_ - start_), 212 name ? name : "varint"); 213 if (checkAvailable(1)) { 214 const byte* pos = pc_; 215 const byte* end = pc_ + 5; 216 if (end > limit_) end = limit_; 217 218 uint32_t result = 0; 219 int shift = 0; 220 byte b = 0; 221 while (pc_ < end) { 222 b = *pc_++; 223 TRACE("%02x ", b); 224 result = result | ((b & 0x7F) << shift); 225 if ((b & 0x80) == 0) break; 226 shift += 7; 227 } 228 229 int length = static_cast<int>(pc_ - pos); 230 if (pc_ == end && (b & 0x80)) { 231 error(pc_ - 1, "varint too large"); 232 } else if (length == 0) { 233 error(pc_, "varint of length 0"); 234 } else { 235 TRACE("= %u\n", result); 236 } 237 return result; 238 } 239 return traceOffEnd<uint32_t>(); 240 } 241 242 // Consume {size} bytes and send them to the bit bucket, advancing {pc_}. consume_bytes(int size)243 void consume_bytes(int size) { 244 if (checkAvailable(size)) { 245 pc_ += size; 246 } else { 247 pc_ = limit_; 248 } 249 } 250 251 // Check that at least {size} bytes exist between {pc_} and {limit_}. checkAvailable(int size)252 bool checkAvailable(int size) { 253 intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size; 254 if (size < 0 || (intptr_t)pc_ > pc_overflow_value) { 255 error(pc_, nullptr, "reading %d bytes would underflow/overflow", size); 256 return false; 257 } else if (pc_ < start_ || limit_ < (pc_ + size)) { 258 error(pc_, nullptr, "expected %d bytes, fell off end", size); 259 return false; 260 } else { 261 return true; 262 } 263 } 264 error(const char * msg)265 void error(const char* msg) { error(pc_, nullptr, "%s", msg); } 266 error(const byte * pc,const char * msg)267 void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); } 268 269 // Sets internal error state. 270 void PRINTF_FORMAT(4, 5) error(const byte * pc,const byte * pt,const char * format,...)271 error(const byte* pc, const byte* pt, const char* format, ...) { 272 if (ok()) { 273 #if DEBUG 274 if (FLAG_wasm_break_on_decoder_error) { 275 base::OS::DebugBreak(); 276 } 277 #endif 278 const int kMaxErrorMsg = 256; 279 char* buffer = new char[kMaxErrorMsg]; 280 va_list arguments; 281 va_start(arguments, format); 282 base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments); 283 va_end(arguments); 284 error_msg_.Reset(buffer); 285 error_pc_ = pc; 286 error_pt_ = pt; 287 onFirstError(); 288 } 289 } 290 291 // Behavior triggered on first error, overridden in subclasses. onFirstError()292 virtual void onFirstError() {} 293 294 // Debugging helper to print bytes up to the end. 295 template <typename T> traceOffEnd()296 T traceOffEnd() { 297 T t = 0; 298 for (const byte* ptr = pc_; ptr < limit_; ptr++) { 299 TRACE("%02x ", *ptr); 300 } 301 TRACE("<end>\n"); 302 pc_ = limit_; 303 return t; 304 } 305 306 // Converts the given value to a {Result}, copying the error if necessary. 307 template <typename T> toResult(T val)308 Result<T> toResult(T val) { 309 Result<T> result; 310 if (error_pc_) { 311 TRACE("Result error: %s\n", error_msg_.get()); 312 result.error_code = kError; 313 result.start = start_; 314 result.error_pc = error_pc_; 315 result.error_pt = error_pt_; 316 // transfer ownership of the error to the result. 317 result.error_msg.Reset(error_msg_.Detach()); 318 } else { 319 result.error_code = kSuccess; 320 } 321 result.val = std::move(val); 322 return result; 323 } 324 325 // Resets the boundaries of this decoder. Reset(const byte * start,const byte * end)326 void Reset(const byte* start, const byte* end) { 327 start_ = start; 328 pc_ = start; 329 limit_ = end; 330 end_ = end; 331 error_pc_ = nullptr; 332 error_pt_ = nullptr; 333 error_msg_.Reset(nullptr); 334 } 335 ok()336 bool ok() const { return error_pc_ == nullptr; } failed()337 bool failed() const { return !error_msg_.is_empty(); } more()338 bool more() const { return pc_ < limit_; } 339 start()340 const byte* start() { return start_; } pc()341 const byte* pc() { return pc_; } pc_offset()342 uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); } 343 344 protected: 345 const byte* start_; 346 const byte* pc_; 347 const byte* limit_; 348 const byte* end_; 349 const byte* error_pc_; 350 const byte* error_pt_; 351 base::SmartArrayPointer<char> error_msg_; 352 353 private: 354 template <typename IntType, bool is_signed> checked_read_leb(const byte * base,unsigned offset,unsigned * length,const char * msg)355 IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length, 356 const char* msg) { 357 if (!check(base, offset, 1, msg)) { 358 *length = 0; 359 return 0; 360 } 361 362 const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7; 363 const byte* ptr = base + offset; 364 const byte* end = ptr + kMaxLength; 365 if (end > limit_) end = limit_; 366 int shift = 0; 367 byte b = 0; 368 IntType result = 0; 369 while (ptr < end) { 370 b = *ptr++; 371 result = result | (static_cast<IntType>(b & 0x7F) << shift); 372 if ((b & 0x80) == 0) break; 373 shift += 7; 374 } 375 DCHECK_LE(ptr - (base + offset), kMaxLength); 376 *length = static_cast<unsigned>(ptr - (base + offset)); 377 if (ptr == end) { 378 // Check there are no bits set beyond the bitwidth of {IntType}. 379 const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8); 380 const byte kExtraBitsMask = 381 static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF); 382 int extra_bits_value; 383 if (is_signed) { 384 // A signed-LEB128 must sign-extend the final byte, excluding its 385 // most-signifcant bit. e.g. for a 32-bit LEB128: 386 // kExtraBits = 4 387 // kExtraBitsMask = 0xf0 388 // If b is 0x0f, the value is negative, so extra_bits_value is 0x70. 389 // If b is 0x03, the value is positive, so extra_bits_value is 0x00. 390 extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) & 391 kExtraBitsMask & ~0x80; 392 } else { 393 extra_bits_value = 0; 394 } 395 if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) { 396 error(base, ptr, "extra bits in varint"); 397 return 0; 398 } 399 if ((b & 0x80) != 0) { 400 error(base, ptr, "%s", msg); 401 return 0; 402 } 403 } 404 return result; 405 } 406 }; 407 408 #undef TRACE 409 } // namespace wasm 410 } // namespace internal 411 } // namespace v8 412 413 #endif // V8_WASM_DECODER_H_ 414