1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Features shared by parsing and pre-parsing scanners. 6 7 #ifndef V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_ 9 10 #include <algorithm> 11 #include <memory> 12 13 #include "include/v8.h" 14 #include "src/base/logging.h" 15 #include "src/common/globals.h" 16 #include "src/common/message-template.h" 17 #include "src/parsing/literal-buffer.h" 18 #include "src/parsing/parse-info.h" 19 #include "src/parsing/token.h" 20 #include "src/strings/char-predicates.h" 21 #include "src/strings/unicode.h" 22 #include "src/utils/allocation.h" 23 #include "src/utils/pointer-with-payload.h" 24 25 namespace v8 { 26 namespace internal { 27 28 class AstRawString; 29 class AstValueFactory; 30 class ExternalOneByteString; 31 class ExternalTwoByteString; 32 class ParserRecorder; 33 class RuntimeCallStats; 34 class Zone; 35 36 // --------------------------------------------------------------------- 37 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 38 // A code unit is a 16 bit value representing either a 16 bit code point 39 // or one part of a surrogate pair that make a single 21 bit code point. 40 class Utf16CharacterStream { 41 public: 42 static constexpr uc32 kEndOfInput = static_cast<uc32>(-1); 43 44 virtual ~Utf16CharacterStream() = default; 45 set_parser_error()46 V8_INLINE void set_parser_error() { 47 buffer_cursor_ = buffer_end_; 48 has_parser_error_ = true; 49 } reset_parser_error_flag()50 V8_INLINE void reset_parser_error_flag() { has_parser_error_ = false; } has_parser_error()51 V8_INLINE bool has_parser_error() const { return has_parser_error_; } 52 Peek()53 inline uc32 Peek() { 54 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) { 55 return static_cast<uc32>(*buffer_cursor_); 56 } else if (ReadBlockChecked()) { 57 return static_cast<uc32>(*buffer_cursor_); 58 } else { 59 return kEndOfInput; 60 } 61 } 62 63 // Returns and advances past the next UTF-16 code unit in the input 64 // stream. If there are no more code units it returns kEndOfInput. Advance()65 inline uc32 Advance() { 66 uc32 result = Peek(); 67 buffer_cursor_++; 68 return result; 69 } 70 71 // Returns and advances past the next UTF-16 code unit in the input stream 72 // that meets the checks requirement. If there are no more code units it 73 // returns kEndOfInput. 74 template <typename FunctionType> AdvanceUntil(FunctionType check)75 V8_INLINE uc32 AdvanceUntil(FunctionType check) { 76 while (true) { 77 auto next_cursor_pos = 78 std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) { 79 uc32 c0_ = static_cast<uc32>(raw_c0_); 80 return check(c0_); 81 }); 82 83 if (next_cursor_pos == buffer_end_) { 84 buffer_cursor_ = buffer_end_; 85 if (!ReadBlockChecked()) { 86 buffer_cursor_++; 87 return kEndOfInput; 88 } 89 } else { 90 buffer_cursor_ = next_cursor_pos + 1; 91 return static_cast<uc32>(*next_cursor_pos); 92 } 93 } 94 } 95 96 // Go back one by one character in the input stream. 97 // This undoes the most recent Advance(). Back()98 inline void Back() { 99 // The common case - if the previous character is within 100 // buffer_start_ .. buffer_end_ will be handles locally. 101 // Otherwise, a new block is requested. 102 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) { 103 buffer_cursor_--; 104 } else { 105 ReadBlockAt(pos() - 1); 106 } 107 } 108 pos()109 inline size_t pos() const { 110 return buffer_pos_ + (buffer_cursor_ - buffer_start_); 111 } 112 Seek(size_t pos)113 inline void Seek(size_t pos) { 114 if (V8_LIKELY(pos >= buffer_pos_ && 115 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) { 116 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_); 117 } else { 118 ReadBlockAt(pos); 119 } 120 } 121 122 // Returns true if the stream could access the V8 heap after construction. can_be_cloned_for_parallel_access()123 bool can_be_cloned_for_parallel_access() const { 124 return can_be_cloned() && !can_access_heap(); 125 } 126 127 // Returns true if the stream can be cloned with Clone. 128 // TODO(rmcilroy): Remove this once ChunkedStreams can be cloned. 129 virtual bool can_be_cloned() const = 0; 130 131 // Clones the character stream to enable another independent scanner to access 132 // the same underlying stream. 133 virtual std::unique_ptr<Utf16CharacterStream> Clone() const = 0; 134 135 // Returns true if the stream could access the V8 heap after construction. 136 virtual bool can_access_heap() const = 0; 137 runtime_call_stats()138 RuntimeCallStats* runtime_call_stats() const { return runtime_call_stats_; } set_runtime_call_stats(RuntimeCallStats * runtime_call_stats)139 void set_runtime_call_stats(RuntimeCallStats* runtime_call_stats) { 140 runtime_call_stats_ = runtime_call_stats; 141 } 142 143 protected: Utf16CharacterStream(const uint16_t * buffer_start,const uint16_t * buffer_cursor,const uint16_t * buffer_end,size_t buffer_pos)144 Utf16CharacterStream(const uint16_t* buffer_start, 145 const uint16_t* buffer_cursor, 146 const uint16_t* buffer_end, size_t buffer_pos) 147 : buffer_start_(buffer_start), 148 buffer_cursor_(buffer_cursor), 149 buffer_end_(buffer_end), 150 buffer_pos_(buffer_pos) {} Utf16CharacterStream()151 Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {} 152 ReadBlockChecked()153 bool ReadBlockChecked() { 154 size_t position = pos(); 155 USE(position); 156 bool success = !has_parser_error() && ReadBlock(); 157 158 // Post-conditions: 1, We should always be at the right position. 159 // 2, Cursor should be inside the buffer. 160 // 3, We should have more characters available iff success. 161 DCHECK_EQ(pos(), position); 162 DCHECK_LE(buffer_cursor_, buffer_end_); 163 DCHECK_LE(buffer_start_, buffer_cursor_); 164 DCHECK_EQ(success, buffer_cursor_ < buffer_end_); 165 return success; 166 } 167 ReadBlockAt(size_t new_pos)168 void ReadBlockAt(size_t new_pos) { 169 // The callers of this method (Back/Back2/Seek) should handle the easy 170 // case (seeking within the current buffer), and we should only get here 171 // if we actually require new data. 172 // (This is really an efficiency check, not a correctness invariant.) 173 DCHECK(new_pos < buffer_pos_ || 174 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_)); 175 176 // Change pos() to point to new_pos. 177 buffer_pos_ = new_pos; 178 buffer_cursor_ = buffer_start_; 179 DCHECK_EQ(pos(), new_pos); 180 ReadBlockChecked(); 181 } 182 183 // Read more data, and update buffer_*_ to point to it. 184 // Returns true if more data was available. 185 // 186 // ReadBlock() may modify any of the buffer_*_ members, but must sure that 187 // the result of pos() remains unaffected. 188 // 189 // Examples: 190 // - a stream could either fill a separate buffer. Then buffer_start_ and 191 // buffer_cursor_ would point to the beginning of the buffer, and 192 // buffer_pos would be the old pos(). 193 // - a stream with existing buffer chunks would set buffer_start_ and 194 // buffer_end_ to cover the full chunk, and then buffer_cursor_ would 195 // point into the middle of the buffer, while buffer_pos_ would describe 196 // the start of the buffer. 197 virtual bool ReadBlock() = 0; 198 199 const uint16_t* buffer_start_; 200 const uint16_t* buffer_cursor_; 201 const uint16_t* buffer_end_; 202 size_t buffer_pos_; 203 RuntimeCallStats* runtime_call_stats_; 204 bool has_parser_error_ = false; 205 }; 206 207 // ---------------------------------------------------------------------------- 208 // JavaScript Scanner. 209 210 class V8_EXPORT_PRIVATE Scanner { 211 public: 212 // Scoped helper for a re-settable bookmark. 213 class V8_EXPORT_PRIVATE BookmarkScope { 214 public: BookmarkScope(Scanner * scanner)215 explicit BookmarkScope(Scanner* scanner) 216 : scanner_(scanner), 217 bookmark_(kNoBookmark), 218 had_parser_error_(scanner->has_parser_error()) { 219 DCHECK_NOT_NULL(scanner_); 220 } 221 ~BookmarkScope() = default; 222 223 void Set(size_t bookmark); 224 void Apply(); 225 bool HasBeenSet() const; 226 bool HasBeenApplied() const; 227 228 private: 229 static const size_t kNoBookmark; 230 static const size_t kBookmarkWasApplied; 231 232 Scanner* scanner_; 233 size_t bookmark_; 234 bool had_parser_error_; 235 236 DISALLOW_COPY_AND_ASSIGN(BookmarkScope); 237 }; 238 239 // Sets the Scanner into an error state to stop further scanning and terminate 240 // the parsing by only returning ILLEGAL tokens after that. set_parser_error()241 V8_INLINE void set_parser_error() { 242 if (!has_parser_error()) { 243 c0_ = kEndOfInput; 244 source_->set_parser_error(); 245 for (TokenDesc& desc : token_storage_) desc.token = Token::ILLEGAL; 246 } 247 } reset_parser_error_flag()248 V8_INLINE void reset_parser_error_flag() { 249 source_->reset_parser_error_flag(); 250 } has_parser_error()251 V8_INLINE bool has_parser_error() const { 252 return source_->has_parser_error(); 253 } 254 255 // Representation of an interval of source positions. 256 struct Location { LocationLocation257 Location(int b, int e) : beg_pos(b), end_pos(e) { } LocationLocation258 Location() : beg_pos(0), end_pos(0) { } 259 lengthLocation260 int length() const { return end_pos - beg_pos; } IsValidLocation261 bool IsValid() const { return base::IsInRange(beg_pos, 0, end_pos); } 262 invalidLocation263 static Location invalid() { return Location(-1, 0); } 264 265 int beg_pos; 266 int end_pos; 267 }; 268 269 // -1 is outside of the range of any real source code. 270 static constexpr uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput; 271 static constexpr uc32 kInvalidSequence = static_cast<uc32>(-1); 272 Invalid()273 static constexpr uc32 Invalid() { return Scanner::kInvalidSequence; } 274 static bool IsInvalid(uc32 c); 275 276 explicit Scanner(Utf16CharacterStream* source, UnoptimizedCompileFlags flags); 277 278 void Initialize(); 279 280 // Returns the next token and advances input. 281 Token::Value Next(); 282 // Returns the token following peek() 283 Token::Value PeekAhead(); 284 // Returns the current token again. current_token()285 Token::Value current_token() const { return current().token; } 286 287 // Returns the location information for the current token 288 // (the token last returned by Next()). location()289 const Location& location() const { return current().location; } 290 291 // This error is specifically an invalid hex or unicode escape sequence. has_error()292 bool has_error() const { return scanner_error_ != MessageTemplate::kNone; } error()293 MessageTemplate error() const { return scanner_error_; } error_location()294 const Location& error_location() const { return scanner_error_location_; } 295 has_invalid_template_escape()296 bool has_invalid_template_escape() const { 297 return current().invalid_template_escape_message != MessageTemplate::kNone; 298 } invalid_template_escape_message()299 MessageTemplate invalid_template_escape_message() const { 300 DCHECK(has_invalid_template_escape()); 301 return current().invalid_template_escape_message; 302 } 303 clear_invalid_template_escape_message()304 void clear_invalid_template_escape_message() { 305 DCHECK(has_invalid_template_escape()); 306 current_->invalid_template_escape_message = MessageTemplate::kNone; 307 } 308 invalid_template_escape_location()309 Location invalid_template_escape_location() const { 310 DCHECK(has_invalid_template_escape()); 311 return current().invalid_template_escape_location; 312 } 313 314 // Similar functions for the upcoming token. 315 316 // One token look-ahead (past the token returned by Next()). peek()317 Token::Value peek() const { return next().token; } 318 peek_location()319 const Location& peek_location() const { return next().location; } 320 literal_contains_escapes()321 bool literal_contains_escapes() const { 322 return LiteralContainsEscapes(current()); 323 } 324 next_literal_contains_escapes()325 bool next_literal_contains_escapes() const { 326 return LiteralContainsEscapes(next()); 327 } 328 329 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const; 330 331 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const; 332 const AstRawString* CurrentRawSymbol( 333 AstValueFactory* ast_value_factory) const; 334 335 double DoubleValue(); 336 337 const char* CurrentLiteralAsCString(Zone* zone) const; 338 CurrentMatches(Token::Value token)339 inline bool CurrentMatches(Token::Value token) const { 340 DCHECK(Token::IsKeyword(token)); 341 return current().token == token; 342 } 343 344 template <size_t N> NextLiteralExactlyEquals(const char (& s)[N])345 bool NextLiteralExactlyEquals(const char (&s)[N]) { 346 DCHECK(next().CanAccessLiteral()); 347 // The length of the token is used to make sure the literal equals without 348 // taking escape sequences (e.g., "use \x73trict") or line continuations 349 // (e.g., "use \(newline) strict") into account. 350 if (!is_next_literal_one_byte()) return false; 351 if (peek_location().length() != N + 1) return false; 352 353 Vector<const uint8_t> next = next_literal_one_byte_string(); 354 const char* chars = reinterpret_cast<const char*>(next.begin()); 355 return next.length() == N - 1 && strncmp(s, chars, N - 1) == 0; 356 } 357 358 template <size_t N> CurrentLiteralEquals(const char (& s)[N])359 bool CurrentLiteralEquals(const char (&s)[N]) { 360 DCHECK(current().CanAccessLiteral()); 361 if (!is_literal_one_byte()) return false; 362 363 Vector<const uint8_t> current = literal_one_byte_string(); 364 const char* chars = reinterpret_cast<const char*>(current.begin()); 365 return current.length() == N - 1 && strncmp(s, chars, N - 1) == 0; 366 } 367 368 // Returns the location of the last seen octal literal. octal_position()369 Location octal_position() const { return octal_pos_; } clear_octal_position()370 void clear_octal_position() { 371 octal_pos_ = Location::invalid(); 372 octal_message_ = MessageTemplate::kNone; 373 } octal_message()374 MessageTemplate octal_message() const { return octal_message_; } 375 376 // Returns the value of the last smi that was scanned. smi_value()377 uint32_t smi_value() const { return current().smi_value_; } 378 379 // Seek forward to the given position. This operation does not 380 // work in general, for instance when there are pushed back 381 // characters, but works for seeking forward until simple delimiter 382 // tokens, which is what it is used for. 383 void SeekForward(int pos); 384 385 // Returns true if there was a line terminator before the peek'ed token, 386 // possibly inside a multi-line comment. HasLineTerminatorBeforeNext()387 bool HasLineTerminatorBeforeNext() const { 388 return next().after_line_terminator; 389 } 390 HasLineTerminatorAfterNext()391 bool HasLineTerminatorAfterNext() { 392 Token::Value ensure_next_next = PeekAhead(); 393 USE(ensure_next_next); 394 return next_next().after_line_terminator; 395 } 396 397 // Scans the input as a regular expression pattern, next token must be /(=). 398 // Returns true if a pattern is scanned. 399 bool ScanRegExpPattern(); 400 // Scans the input as regular expression flags. Returns the flags on success. 401 Maybe<int> ScanRegExpFlags(); 402 403 // Scans the input as a template literal ScanTemplateContinuation()404 Token::Value ScanTemplateContinuation() { 405 DCHECK_EQ(next().token, Token::RBRACE); 406 DCHECK_EQ(source_pos() - 1, next().location.beg_pos); 407 return ScanTemplateSpan(); 408 } 409 410 template <typename LocalIsolate> 411 Handle<String> SourceUrl(LocalIsolate* isolate) const; 412 template <typename LocalIsolate> 413 Handle<String> SourceMappingUrl(LocalIsolate* isolate) const; 414 FoundHtmlComment()415 bool FoundHtmlComment() const { return found_html_comment_; } 416 stream()417 const Utf16CharacterStream* stream() const { return source_; } 418 419 private: 420 // Scoped helper for saving & restoring scanner error state. 421 // This is used for tagged template literals, in which normally forbidden 422 // escape sequences are allowed. 423 class ErrorState; 424 425 // The current and look-ahead token. 426 struct TokenDesc { 427 Location location = {0, 0}; 428 LiteralBuffer literal_chars; 429 LiteralBuffer raw_literal_chars; 430 Token::Value token = Token::UNINITIALIZED; 431 MessageTemplate invalid_template_escape_message = MessageTemplate::kNone; 432 Location invalid_template_escape_location; 433 uint32_t smi_value_ = 0; 434 bool after_line_terminator = false; 435 436 #ifdef DEBUG CanAccessLiteralTokenDesc437 bool CanAccessLiteral() const { 438 return token == Token::PRIVATE_NAME || token == Token::ILLEGAL || 439 token == Token::ESCAPED_KEYWORD || token == Token::UNINITIALIZED || 440 token == Token::REGEXP_LITERAL || 441 base::IsInRange(token, Token::NUMBER, Token::STRING) || 442 Token::IsAnyIdentifier(token) || Token::IsKeyword(token) || 443 base::IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL); 444 } CanAccessRawLiteralTokenDesc445 bool CanAccessRawLiteral() const { 446 return token == Token::ILLEGAL || token == Token::UNINITIALIZED || 447 base::IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL); 448 } 449 #endif // DEBUG 450 }; 451 452 enum NumberKind { 453 IMPLICIT_OCTAL, 454 BINARY, 455 OCTAL, 456 HEX, 457 DECIMAL, 458 DECIMAL_WITH_LEADING_ZERO 459 }; 460 IsValidBigIntKind(NumberKind kind)461 inline bool IsValidBigIntKind(NumberKind kind) { 462 return base::IsInRange(kind, BINARY, DECIMAL); 463 } 464 IsDecimalNumberKind(NumberKind kind)465 inline bool IsDecimalNumberKind(NumberKind kind) { 466 return base::IsInRange(kind, DECIMAL, DECIMAL_WITH_LEADING_ZERO); 467 } 468 469 static const int kCharacterLookaheadBufferSize = 1; 470 static const int kMaxAscii = 127; 471 472 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. 473 template <bool capture_raw> 474 uc32 ScanOctalEscape(uc32 c, int length); 475 476 // Call this after setting source_ to the input. Init()477 void Init() { 478 // Set c0_ (one character ahead) 479 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); 480 Advance(); 481 482 current_ = &token_storage_[0]; 483 next_ = &token_storage_[1]; 484 next_next_ = &token_storage_[2]; 485 486 found_html_comment_ = false; 487 scanner_error_ = MessageTemplate::kNone; 488 } 489 ReportScannerError(const Location & location,MessageTemplate error)490 void ReportScannerError(const Location& location, MessageTemplate error) { 491 if (has_error()) return; 492 scanner_error_ = error; 493 scanner_error_location_ = location; 494 } 495 ReportScannerError(int pos,MessageTemplate error)496 void ReportScannerError(int pos, MessageTemplate error) { 497 if (has_error()) return; 498 scanner_error_ = error; 499 scanner_error_location_ = Location(pos, pos + 1); 500 } 501 502 // Seek to the next_ token at the given position. 503 void SeekNext(size_t position); 504 AddLiteralChar(uc32 c)505 V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); } 506 AddLiteralChar(char c)507 V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); } 508 AddRawLiteralChar(uc32 c)509 V8_INLINE void AddRawLiteralChar(uc32 c) { 510 next().raw_literal_chars.AddChar(c); 511 } 512 AddLiteralCharAdvance()513 V8_INLINE void AddLiteralCharAdvance() { 514 AddLiteralChar(c0_); 515 Advance(); 516 } 517 518 // Low-level scanning support. 519 template <bool capture_raw = false> Advance()520 void Advance() { 521 if (capture_raw) { 522 AddRawLiteralChar(c0_); 523 } 524 c0_ = source_->Advance(); 525 } 526 527 template <typename FunctionType> AdvanceUntil(FunctionType check)528 V8_INLINE void AdvanceUntil(FunctionType check) { 529 c0_ = source_->AdvanceUntil(check); 530 } 531 CombineSurrogatePair()532 bool CombineSurrogatePair() { 533 DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput)); 534 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { 535 uc32 c1 = source_->Advance(); 536 DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput)); 537 if (unibrow::Utf16::IsTrailSurrogate(c1)) { 538 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); 539 return true; 540 } 541 source_->Back(); 542 } 543 return false; 544 } 545 PushBack(uc32 ch)546 void PushBack(uc32 ch) { 547 DCHECK(IsInvalid(c0_) || 548 base::IsInRange(c0_, 0u, unibrow::Utf16::kMaxNonSurrogateCharCode)); 549 source_->Back(); 550 c0_ = ch; 551 } 552 Peek()553 uc32 Peek() const { return source_->Peek(); } 554 Select(Token::Value tok)555 inline Token::Value Select(Token::Value tok) { 556 Advance(); 557 return tok; 558 } 559 Select(uc32 next,Token::Value then,Token::Value else_)560 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 561 Advance(); 562 if (c0_ == next) { 563 Advance(); 564 return then; 565 } else { 566 return else_; 567 } 568 } 569 // Returns the literal string, if any, for the current token (the 570 // token last returned by Next()). The string is 0-terminated. 571 // Literal strings are collected for identifiers, strings, numbers as well 572 // as for template literals. For template literals we also collect the raw 573 // form. 574 // These functions only give the correct result if the literal was scanned 575 // when a LiteralScope object is alive. 576 // 577 // Current usage of these functions is unfortunately a little undisciplined, 578 // and is_literal_one_byte() + is_literal_one_byte_string() is also 579 // requested for tokens that do not have a literal. Hence, we treat any 580 // token as a one-byte literal. E.g. Token::FUNCTION pretends to have a 581 // literal "function". literal_one_byte_string()582 Vector<const uint8_t> literal_one_byte_string() const { 583 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token) || 584 current().token == Token::ESCAPED_KEYWORD); 585 return current().literal_chars.one_byte_literal(); 586 } literal_two_byte_string()587 Vector<const uint16_t> literal_two_byte_string() const { 588 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token) || 589 current().token == Token::ESCAPED_KEYWORD); 590 return current().literal_chars.two_byte_literal(); 591 } is_literal_one_byte()592 bool is_literal_one_byte() const { 593 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token) || 594 current().token == Token::ESCAPED_KEYWORD); 595 return current().literal_chars.is_one_byte(); 596 } 597 // Returns the literal string for the next token (the token that 598 // would be returned if Next() were called). next_literal_one_byte_string()599 Vector<const uint8_t> next_literal_one_byte_string() const { 600 DCHECK(next().CanAccessLiteral()); 601 return next().literal_chars.one_byte_literal(); 602 } next_literal_two_byte_string()603 Vector<const uint16_t> next_literal_two_byte_string() const { 604 DCHECK(next().CanAccessLiteral()); 605 return next().literal_chars.two_byte_literal(); 606 } is_next_literal_one_byte()607 bool is_next_literal_one_byte() const { 608 DCHECK(next().CanAccessLiteral()); 609 return next().literal_chars.is_one_byte(); 610 } raw_literal_one_byte_string()611 Vector<const uint8_t> raw_literal_one_byte_string() const { 612 DCHECK(current().CanAccessRawLiteral()); 613 return current().raw_literal_chars.one_byte_literal(); 614 } raw_literal_two_byte_string()615 Vector<const uint16_t> raw_literal_two_byte_string() const { 616 DCHECK(current().CanAccessRawLiteral()); 617 return current().raw_literal_chars.two_byte_literal(); 618 } is_raw_literal_one_byte()619 bool is_raw_literal_one_byte() const { 620 DCHECK(current().CanAccessRawLiteral()); 621 return current().raw_literal_chars.is_one_byte(); 622 } 623 624 template <bool capture_raw, bool unicode = false> 625 uc32 ScanHexNumber(int expected_length); 626 // Scan a number of any length but not bigger than max_value. For example, the 627 // number can be 000000001, so it's very long in characters but its value is 628 // small. 629 template <bool capture_raw> 630 uc32 ScanUnlimitedLengthHexNumber(uc32 max_value, int beg_pos); 631 632 // Scans a single JavaScript token. 633 V8_INLINE Token::Value ScanSingleToken(); 634 V8_INLINE void Scan(); 635 // Performance hack: pass through a pre-calculated "next()" value to avoid 636 // having to re-calculate it in Scan. You'd think the compiler would be able 637 // to hoist the next() calculation out of the inlined Scan method, but seems 638 // that pointer aliasing analysis fails show that this is safe. 639 V8_INLINE void Scan(TokenDesc* next_desc); 640 641 V8_INLINE Token::Value SkipWhiteSpace(); 642 Token::Value SkipSingleHTMLComment(); 643 Token::Value SkipSingleLineComment(); 644 Token::Value SkipSourceURLComment(); 645 void TryToParseSourceURLComment(); 646 Token::Value SkipMultiLineComment(); 647 // Scans a possible HTML comment -- begins with '<!'. 648 Token::Value ScanHtmlComment(); 649 650 bool ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch), 651 bool is_check_first_digit); 652 bool ScanDecimalDigits(bool allow_numeric_separator); 653 // Optimized function to scan decimal number as Smi. 654 bool ScanDecimalAsSmi(uint64_t* value, bool allow_numeric_separator); 655 bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value); 656 bool ScanHexDigits(); 657 bool ScanBinaryDigits(); 658 bool ScanSignedInteger(); 659 bool ScanOctalDigits(); 660 bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind); 661 662 Token::Value ScanNumber(bool seen_period); 663 V8_INLINE Token::Value ScanIdentifierOrKeyword(); 664 V8_INLINE Token::Value ScanIdentifierOrKeywordInner(); 665 Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped, 666 bool can_be_keyword); 667 668 Token::Value ScanString(); 669 Token::Value ScanPrivateName(); 670 671 // Scans an escape-sequence which is part of a string and adds the 672 // decoded character to the current literal. Returns true if a pattern 673 // is scanned. 674 template <bool capture_raw> 675 bool ScanEscape(); 676 677 // Decodes a Unicode escape-sequence which is part of an identifier. 678 // If the escape sequence cannot be decoded the result is kBadChar. 679 uc32 ScanIdentifierUnicodeEscape(); 680 // Helper for the above functions. 681 template <bool capture_raw> 682 uc32 ScanUnicodeEscape(); 683 684 Token::Value ScanTemplateSpan(); 685 686 // Return the current source position. source_pos()687 int source_pos() { 688 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; 689 } 690 LiteralContainsEscapes(const TokenDesc & token)691 static bool LiteralContainsEscapes(const TokenDesc& token) { 692 Location location = token.location; 693 int source_length = (location.end_pos - location.beg_pos); 694 if (token.token == Token::STRING) { 695 // Subtract delimiters. 696 source_length -= 2; 697 } 698 return token.literal_chars.length() != source_length; 699 } 700 701 #ifdef DEBUG 702 void SanityCheckTokenDesc(const TokenDesc&) const; 703 #endif 704 next()705 TokenDesc& next() { return *next_; } 706 current()707 const TokenDesc& current() const { return *current_; } next()708 const TokenDesc& next() const { return *next_; } next_next()709 const TokenDesc& next_next() const { return *next_next_; } 710 711 UnoptimizedCompileFlags flags_; 712 713 TokenDesc* current_; // desc for current token (as returned by Next()) 714 TokenDesc* next_; // desc for next token (one token look-ahead) 715 TokenDesc* next_next_; // desc for the token after next (after PeakAhead()) 716 717 // Input stream. Must be initialized to an Utf16CharacterStream. 718 Utf16CharacterStream* const source_; 719 720 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 721 uc32 c0_; 722 723 TokenDesc token_storage_[3]; 724 725 // Whether this scanner encountered an HTML comment. 726 bool found_html_comment_; 727 728 // Values parsed from magic comments. 729 LiteralBuffer source_url_; 730 LiteralBuffer source_mapping_url_; 731 732 // Last-seen positions of potentially problematic tokens. 733 Location octal_pos_; 734 MessageTemplate octal_message_; 735 736 MessageTemplate scanner_error_; 737 Location scanner_error_location_; 738 }; 739 740 } // namespace internal 741 } // namespace v8 742 743 #endif // V8_PARSING_SCANNER_H_ 744