1 // Copyright 2011 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Features shared by parsing and pre-parsing scanners. 6 7 #ifndef V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_ 9 10 #include "src/allocation.h" 11 #include "src/base/hashmap.h" 12 #include "src/base/logging.h" 13 #include "src/char-predicates.h" 14 #include "src/collector.h" 15 #include "src/globals.h" 16 #include "src/list.h" 17 #include "src/messages.h" 18 #include "src/parsing/token.h" 19 #include "src/unicode-decoder.h" 20 #include "src/unicode.h" 21 22 namespace v8 { 23 namespace internal { 24 25 26 class AstRawString; 27 class AstValueFactory; 28 class ParserRecorder; 29 class UnicodeCache; 30 31 32 // --------------------------------------------------------------------- 33 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. 34 // A code unit is a 16 bit value representing either a 16 bit code point 35 // or one part of a surrogate pair that make a single 21 bit code point. 36 37 class Utf16CharacterStream { 38 public: Utf16CharacterStream()39 Utf16CharacterStream() : pos_(0) { } ~Utf16CharacterStream()40 virtual ~Utf16CharacterStream() { } 41 42 // Returns and advances past the next UTF-16 code unit in the input 43 // stream. If there are no more code units, it returns a negative 44 // value. Advance()45 inline uc32 Advance() { 46 if (buffer_cursor_ < buffer_end_ || ReadBlock()) { 47 pos_++; 48 return static_cast<uc32>(*(buffer_cursor_++)); 49 } 50 // Note: currently the following increment is necessary to avoid a 51 // parser problem! The scanner treats the final kEndOfInput as 52 // a code unit with a position, and does math relative to that 53 // position. 54 pos_++; 55 56 return kEndOfInput; 57 } 58 59 // Return the current position in the code unit stream. 60 // Starts at zero. pos()61 inline size_t pos() const { return pos_; } 62 63 // Skips forward past the next code_unit_count UTF-16 code units 64 // in the input, or until the end of input if that comes sooner. 65 // Returns the number of code units actually skipped. If less 66 // than code_unit_count, SeekForward(size_t code_unit_count)67 inline size_t SeekForward(size_t code_unit_count) { 68 size_t buffered_chars = buffer_end_ - buffer_cursor_; 69 if (code_unit_count <= buffered_chars) { 70 buffer_cursor_ += code_unit_count; 71 pos_ += code_unit_count; 72 return code_unit_count; 73 } 74 return SlowSeekForward(code_unit_count); 75 } 76 77 // Pushes back the most recently read UTF-16 code unit (or negative 78 // value if at end of input), i.e., the value returned by the most recent 79 // call to Advance. 80 // Must not be used right after calling SeekForward. 81 virtual void PushBack(int32_t code_unit) = 0; 82 83 virtual bool SetBookmark(); 84 virtual void ResetToBookmark(); 85 86 protected: 87 static const uc32 kEndOfInput = -1; 88 89 // Ensures that the buffer_cursor_ points to the code_unit at 90 // position pos_ of the input, if possible. If the position 91 // is at or after the end of the input, return false. If there 92 // are more code_units available, return true. 93 virtual bool ReadBlock() = 0; 94 virtual size_t SlowSeekForward(size_t code_unit_count) = 0; 95 96 const uint16_t* buffer_cursor_; 97 const uint16_t* buffer_end_; 98 size_t pos_; 99 }; 100 101 102 // --------------------------------------------------------------------- 103 // DuplicateFinder discovers duplicate symbols. 104 105 class DuplicateFinder { 106 public: DuplicateFinder(UnicodeCache * constants)107 explicit DuplicateFinder(UnicodeCache* constants) 108 : unicode_constants_(constants), 109 backing_store_(16), 110 map_(&Match) { } 111 112 int AddOneByteSymbol(Vector<const uint8_t> key, int value); 113 int AddTwoByteSymbol(Vector<const uint16_t> key, int value); 114 // Add a a number literal by converting it (if necessary) 115 // to the string that ToString(ToNumber(literal)) would generate. 116 // and then adding that string with AddOneByteSymbol. 117 // This string is the actual value used as key in an object literal, 118 // and the one that must be different from the other keys. 119 int AddNumber(Vector<const uint8_t> key, int value); 120 121 private: 122 int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value); 123 // Backs up the key and its length in the backing store. 124 // The backup is stored with a base 127 encoding of the 125 // length (plus a bit saying whether the string is one byte), 126 // followed by the bytes of the key. 127 uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte); 128 129 // Compare two encoded keys (both pointing into the backing store) 130 // for having the same base-127 encoded lengths and representation. 131 // and then having the same 'length' bytes following. 132 static bool Match(void* first, void* second); 133 // Creates a hash from a sequence of bytes. 134 static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte); 135 // Checks whether a string containing a JS number is its canonical 136 // form. 137 static bool IsNumberCanonical(Vector<const uint8_t> key); 138 139 // Size of buffer. Sufficient for using it to call DoubleToCString in 140 // from conversions.h. 141 static const int kBufferSize = 100; 142 143 UnicodeCache* unicode_constants_; 144 // Backing store used to store strings used as hashmap keys. 145 SequenceCollector<unsigned char> backing_store_; 146 base::HashMap map_; 147 // Buffer used for string->number->canonical string conversions. 148 char number_buffer_[kBufferSize]; 149 }; 150 151 // ---------------------------------------------------------------------------- 152 // LiteralBuffer - Collector of chars of literals. 153 154 const int kMaxAscii = 127; 155 156 class LiteralBuffer { 157 public: LiteralBuffer()158 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } 159 ~LiteralBuffer()160 ~LiteralBuffer() { backing_store_.Dispose(); } 161 INLINE(void AddChar (char code_unit))162 INLINE(void AddChar(char code_unit)) { 163 if (position_ >= backing_store_.length()) ExpandBuffer(); 164 DCHECK(is_one_byte_); 165 DCHECK(0 <= code_unit && code_unit <= kMaxAscii); 166 backing_store_[position_] = static_cast<byte>(code_unit); 167 position_ += kOneByteSize; 168 return; 169 } 170 INLINE(void AddChar (uc32 code_unit))171 INLINE(void AddChar(uc32 code_unit)) { 172 if (position_ >= backing_store_.length()) ExpandBuffer(); 173 if (is_one_byte_) { 174 if (code_unit <= unibrow::Latin1::kMaxChar) { 175 backing_store_[position_] = static_cast<byte>(code_unit); 176 position_ += kOneByteSize; 177 return; 178 } 179 ConvertToTwoByte(); 180 } 181 if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) { 182 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit; 183 position_ += kUC16Size; 184 } else { 185 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = 186 unibrow::Utf16::LeadSurrogate(code_unit); 187 position_ += kUC16Size; 188 if (position_ >= backing_store_.length()) ExpandBuffer(); 189 *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = 190 unibrow::Utf16::TrailSurrogate(code_unit); 191 position_ += kUC16Size; 192 } 193 } 194 is_one_byte()195 bool is_one_byte() const { return is_one_byte_; } 196 is_contextual_keyword(Vector<const char> keyword)197 bool is_contextual_keyword(Vector<const char> keyword) const { 198 return is_one_byte() && keyword.length() == position_ && 199 (memcmp(keyword.start(), backing_store_.start(), position_) == 0); 200 } 201 two_byte_literal()202 Vector<const uint16_t> two_byte_literal() const { 203 DCHECK(!is_one_byte_); 204 DCHECK((position_ & 0x1) == 0); 205 return Vector<const uint16_t>( 206 reinterpret_cast<const uint16_t*>(backing_store_.start()), 207 position_ >> 1); 208 } 209 one_byte_literal()210 Vector<const uint8_t> one_byte_literal() const { 211 DCHECK(is_one_byte_); 212 return Vector<const uint8_t>( 213 reinterpret_cast<const uint8_t*>(backing_store_.start()), 214 position_); 215 } 216 length()217 int length() const { 218 return is_one_byte_ ? position_ : (position_ >> 1); 219 } 220 ReduceLength(int delta)221 void ReduceLength(int delta) { 222 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); 223 } 224 Reset()225 void Reset() { 226 position_ = 0; 227 is_one_byte_ = true; 228 } 229 230 Handle<String> Internalize(Isolate* isolate) const; 231 CopyFrom(const LiteralBuffer * other)232 void CopyFrom(const LiteralBuffer* other) { 233 if (other == nullptr) { 234 Reset(); 235 } else { 236 is_one_byte_ = other->is_one_byte_; 237 position_ = other->position_; 238 if (position_ < backing_store_.length()) { 239 std::copy(other->backing_store_.begin(), 240 other->backing_store_.begin() + position_, 241 backing_store_.begin()); 242 } else { 243 backing_store_.Dispose(); 244 backing_store_ = other->backing_store_.Clone(); 245 } 246 } 247 } 248 249 private: 250 static const int kInitialCapacity = 16; 251 static const int kGrowthFactory = 4; 252 static const int kMinConversionSlack = 256; 253 static const int kMaxGrowth = 1 * MB; NewCapacity(int min_capacity)254 inline int NewCapacity(int min_capacity) { 255 int capacity = Max(min_capacity, backing_store_.length()); 256 int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); 257 return new_capacity; 258 } 259 ExpandBuffer()260 void ExpandBuffer() { 261 Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity)); 262 MemCopy(new_store.start(), backing_store_.start(), position_); 263 backing_store_.Dispose(); 264 backing_store_ = new_store; 265 } 266 ConvertToTwoByte()267 void ConvertToTwoByte() { 268 DCHECK(is_one_byte_); 269 Vector<byte> new_store; 270 int new_content_size = position_ * kUC16Size; 271 if (new_content_size >= backing_store_.length()) { 272 // Ensure room for all currently read code units as UC16 as well 273 // as the code unit about to be stored. 274 new_store = Vector<byte>::New(NewCapacity(new_content_size)); 275 } else { 276 new_store = backing_store_; 277 } 278 uint8_t* src = backing_store_.start(); 279 uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start()); 280 for (int i = position_ - 1; i >= 0; i--) { 281 dst[i] = src[i]; 282 } 283 if (new_store.start() != backing_store_.start()) { 284 backing_store_.Dispose(); 285 backing_store_ = new_store; 286 } 287 position_ = new_content_size; 288 is_one_byte_ = false; 289 } 290 291 bool is_one_byte_; 292 int position_; 293 Vector<byte> backing_store_; 294 295 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); 296 }; 297 298 299 // ---------------------------------------------------------------------------- 300 // JavaScript Scanner. 301 302 class Scanner { 303 public: 304 // Scoped helper for literal recording. Automatically drops the literal 305 // if aborting the scanning before it's complete. 306 class LiteralScope { 307 public: LiteralScope(Scanner * self)308 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { 309 scanner_->StartLiteral(); 310 } ~LiteralScope()311 ~LiteralScope() { 312 if (!complete_) scanner_->DropLiteral(); 313 } Complete()314 void Complete() { 315 complete_ = true; 316 } 317 318 private: 319 Scanner* scanner_; 320 bool complete_; 321 }; 322 323 // Scoped helper for a re-settable bookmark. 324 class BookmarkScope { 325 public: BookmarkScope(Scanner * scanner)326 explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) { 327 DCHECK_NOT_NULL(scanner_); 328 } ~BookmarkScope()329 ~BookmarkScope() { scanner_->DropBookmark(); } 330 Set()331 bool Set() { return scanner_->SetBookmark(); } Reset()332 void Reset() { scanner_->ResetToBookmark(); } HasBeenSet()333 bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); } HasBeenReset()334 bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); } 335 336 private: 337 Scanner* scanner_; 338 339 DISALLOW_COPY_AND_ASSIGN(BookmarkScope); 340 }; 341 342 // Representation of an interval of source positions. 343 struct Location { LocationLocation344 Location(int b, int e) : beg_pos(b), end_pos(e) { } LocationLocation345 Location() : beg_pos(0), end_pos(0) { } 346 IsValidLocation347 bool IsValid() const { 348 return beg_pos >= 0 && end_pos >= beg_pos; 349 } 350 invalidLocation351 static Location invalid() { return Location(-1, -1); } 352 353 int beg_pos; 354 int end_pos; 355 }; 356 357 // -1 is outside of the range of any real source code. 358 static const int kNoOctalLocation = -1; 359 360 explicit Scanner(UnicodeCache* scanner_contants); 361 362 void Initialize(Utf16CharacterStream* source); 363 364 // Returns the next token and advances input. 365 Token::Value Next(); 366 // Returns the token following peek() 367 Token::Value PeekAhead(); 368 // Returns the current token again. current_token()369 Token::Value current_token() { return current_.token; } 370 // Returns the location information for the current token 371 // (the token last returned by Next()). location()372 Location location() const { return current_.location; } 373 has_error()374 bool has_error() const { return scanner_error_ != MessageTemplate::kNone; } error()375 MessageTemplate::Template error() const { return scanner_error_; } error_location()376 Location error_location() const { return scanner_error_location_; } 377 378 // Similar functions for the upcoming token. 379 380 // One token look-ahead (past the token returned by Next()). peek()381 Token::Value peek() const { return next_.token; } 382 peek_location()383 Location peek_location() const { return next_.location; } 384 literal_contains_escapes()385 bool literal_contains_escapes() const { 386 return LiteralContainsEscapes(current_); 387 } next_literal_contains_escapes()388 bool next_literal_contains_escapes() const { 389 return LiteralContainsEscapes(next_); 390 } is_literal_contextual_keyword(Vector<const char> keyword)391 bool is_literal_contextual_keyword(Vector<const char> keyword) { 392 DCHECK_NOT_NULL(current_.literal_chars); 393 return current_.literal_chars->is_contextual_keyword(keyword); 394 } is_next_contextual_keyword(Vector<const char> keyword)395 bool is_next_contextual_keyword(Vector<const char> keyword) { 396 DCHECK_NOT_NULL(next_.literal_chars); 397 return next_.literal_chars->is_contextual_keyword(keyword); 398 } 399 400 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); 401 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); 402 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); 403 404 double DoubleValue(); 405 bool ContainsDot(); 406 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { 407 if (is_literal_one_byte() && 408 literal_length() == length && 409 (allow_escapes || !literal_contains_escapes())) { 410 const char* token = 411 reinterpret_cast<const char*>(literal_one_byte_string().start()); 412 return !strncmp(token, data, length); 413 } 414 return false; 415 } UnescapedLiteralMatches(const char * data,int length)416 inline bool UnescapedLiteralMatches(const char* data, int length) { 417 return LiteralMatches(data, length, false); 418 } 419 IsGetOrSet(bool * is_get,bool * is_set)420 void IsGetOrSet(bool* is_get, bool* is_set) { 421 if (is_literal_one_byte() && 422 literal_length() == 3 && 423 !literal_contains_escapes()) { 424 const char* token = 425 reinterpret_cast<const char*>(literal_one_byte_string().start()); 426 *is_get = strncmp(token, "get", 3) == 0; 427 *is_set = !*is_get && strncmp(token, "set", 3) == 0; 428 } 429 } 430 431 int FindSymbol(DuplicateFinder* finder, int value); 432 unicode_cache()433 UnicodeCache* unicode_cache() { return unicode_cache_; } 434 435 // Returns the location of the last seen octal literal. octal_position()436 Location octal_position() const { return octal_pos_; } clear_octal_position()437 void clear_octal_position() { octal_pos_ = Location::invalid(); } 438 // Returns the location of the last seen decimal literal with a leading zero. decimal_with_leading_zero_position()439 Location decimal_with_leading_zero_position() const { 440 return decimal_with_leading_zero_pos_; 441 } clear_decimal_with_leading_zero_position()442 void clear_decimal_with_leading_zero_position() { 443 decimal_with_leading_zero_pos_ = Location::invalid(); 444 } 445 446 // Returns the value of the last smi that was scanned. smi_value()447 int smi_value() const { return current_.smi_value_; } 448 449 // Seek forward to the given position. This operation does not 450 // work in general, for instance when there are pushed back 451 // characters, but works for seeking forward until simple delimiter 452 // tokens, which is what it is used for. 453 void SeekForward(int pos); 454 455 // Returns true if there was a line terminator before the peek'ed token, 456 // possibly inside a multi-line comment. HasAnyLineTerminatorBeforeNext()457 bool HasAnyLineTerminatorBeforeNext() const { 458 return has_line_terminator_before_next_ || 459 has_multiline_comment_before_next_; 460 } 461 HasAnyLineTerminatorAfterNext()462 bool HasAnyLineTerminatorAfterNext() { 463 Token::Value ensure_next_next = PeekAhead(); 464 USE(ensure_next_next); 465 return has_line_terminator_after_next_; 466 } 467 468 // Scans the input as a regular expression pattern, previous 469 // character(s) must be /(=). Returns true if a pattern is scanned. 470 bool ScanRegExpPattern(bool seen_equal); 471 // Scans the input as regular expression flags. Returns the flags on success. 472 Maybe<RegExp::Flags> ScanRegExpFlags(); 473 474 // Scans the input as a template literal 475 Token::Value ScanTemplateStart(); 476 Token::Value ScanTemplateContinuation(); 477 source_url()478 const LiteralBuffer* source_url() const { return &source_url_; } source_mapping_url()479 const LiteralBuffer* source_mapping_url() const { 480 return &source_mapping_url_; 481 } 482 483 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; 484 FoundHtmlComment()485 bool FoundHtmlComment() const { return found_html_comment_; } 486 487 #define DECLARE_ACCESSORS(name) \ 488 inline bool allow_##name() const { return allow_##name##_; } \ 489 inline void set_allow_##name(bool allow) { allow_##name##_ = allow; } 490 DECLARE_ACCESSORS(harmony_exponentiation_operator) 491 #undef ACCESSOR 492 493 private: 494 // The current and look-ahead token. 495 struct TokenDesc { 496 Token::Value token; 497 Location location; 498 LiteralBuffer* literal_chars; 499 LiteralBuffer* raw_literal_chars; 500 int smi_value_; 501 }; 502 503 static const int kCharacterLookaheadBufferSize = 1; 504 505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. 506 template <bool capture_raw> 507 uc32 ScanOctalEscape(uc32 c, int length); 508 509 // Call this after setting source_ to the input. Init()510 void Init() { 511 // Set c0_ (one character ahead) 512 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); 513 Advance(); 514 // Initialize current_ to not refer to a literal. 515 current_.literal_chars = NULL; 516 current_.raw_literal_chars = NULL; 517 next_next_.token = Token::UNINITIALIZED; 518 found_html_comment_ = false; 519 scanner_error_ = MessageTemplate::kNone; 520 } 521 522 // Support BookmarkScope functionality. 523 bool SetBookmark(); 524 void ResetToBookmark(); 525 bool BookmarkHasBeenSet(); 526 bool BookmarkHasBeenReset(); 527 void DropBookmark(); 528 static void CopyTokenDesc(TokenDesc* to, TokenDesc* from); 529 ReportScannerError(const Location & location,MessageTemplate::Template error)530 void ReportScannerError(const Location& location, 531 MessageTemplate::Template error) { 532 if (has_error()) return; 533 scanner_error_ = error; 534 scanner_error_location_ = location; 535 } 536 ReportScannerError(int pos,MessageTemplate::Template error)537 void ReportScannerError(int pos, MessageTemplate::Template error) { 538 if (has_error()) return; 539 scanner_error_ = error; 540 scanner_error_location_ = Location(pos, pos + 1); 541 } 542 543 // Literal buffer support StartLiteral()544 inline void StartLiteral() { 545 LiteralBuffer* free_buffer = 546 (current_.literal_chars == &literal_buffer0_) 547 ? &literal_buffer1_ 548 : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_ 549 : &literal_buffer0_; 550 free_buffer->Reset(); 551 next_.literal_chars = free_buffer; 552 } 553 StartRawLiteral()554 inline void StartRawLiteral() { 555 LiteralBuffer* free_buffer = 556 (current_.raw_literal_chars == &raw_literal_buffer0_) 557 ? &raw_literal_buffer1_ 558 : (current_.raw_literal_chars == &raw_literal_buffer1_) 559 ? &raw_literal_buffer2_ 560 : &raw_literal_buffer0_; 561 free_buffer->Reset(); 562 next_.raw_literal_chars = free_buffer; 563 } 564 INLINE(void AddLiteralChar (uc32 c))565 INLINE(void AddLiteralChar(uc32 c)) { 566 DCHECK_NOT_NULL(next_.literal_chars); 567 next_.literal_chars->AddChar(c); 568 } 569 INLINE(void AddLiteralChar (char c))570 INLINE(void AddLiteralChar(char c)) { 571 DCHECK_NOT_NULL(next_.literal_chars); 572 next_.literal_chars->AddChar(c); 573 } 574 INLINE(void AddRawLiteralChar (uc32 c))575 INLINE(void AddRawLiteralChar(uc32 c)) { 576 DCHECK_NOT_NULL(next_.raw_literal_chars); 577 next_.raw_literal_chars->AddChar(c); 578 } 579 INLINE(void ReduceRawLiteralLength (int delta))580 INLINE(void ReduceRawLiteralLength(int delta)) { 581 DCHECK_NOT_NULL(next_.raw_literal_chars); 582 next_.raw_literal_chars->ReduceLength(delta); 583 } 584 585 // Stops scanning of a literal and drop the collected characters, 586 // e.g., due to an encountered error. DropLiteral()587 inline void DropLiteral() { 588 next_.literal_chars = NULL; 589 next_.raw_literal_chars = NULL; 590 } 591 AddLiteralCharAdvance()592 inline void AddLiteralCharAdvance() { 593 AddLiteralChar(c0_); 594 Advance(); 595 } 596 597 // Low-level scanning support. 598 template <bool capture_raw = false, bool check_surrogate = true> Advance()599 void Advance() { 600 if (capture_raw) { 601 AddRawLiteralChar(c0_); 602 } 603 c0_ = source_->Advance(); 604 if (check_surrogate) HandleLeadSurrogate(); 605 } 606 HandleLeadSurrogate()607 void HandleLeadSurrogate() { 608 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { 609 uc32 c1 = source_->Advance(); 610 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { 611 source_->PushBack(c1); 612 } else { 613 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); 614 } 615 } 616 } 617 PushBack(uc32 ch)618 void PushBack(uc32 ch) { 619 if (c0_ > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { 620 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); 621 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); 622 } else { 623 source_->PushBack(c0_); 624 } 625 c0_ = ch; 626 } 627 Select(Token::Value tok)628 inline Token::Value Select(Token::Value tok) { 629 Advance(); 630 return tok; 631 } 632 Select(uc32 next,Token::Value then,Token::Value else_)633 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 634 Advance(); 635 if (c0_ == next) { 636 Advance(); 637 return then; 638 } else { 639 return else_; 640 } 641 } 642 643 // Returns the literal string, if any, for the current token (the 644 // token last returned by Next()). The string is 0-terminated. 645 // Literal strings are collected for identifiers, strings, numbers as well 646 // as for template literals. For template literals we also collect the raw 647 // form. 648 // These functions only give the correct result if the literal was scanned 649 // when a LiteralScope object is alive. literal_one_byte_string()650 Vector<const uint8_t> literal_one_byte_string() { 651 DCHECK_NOT_NULL(current_.literal_chars); 652 return current_.literal_chars->one_byte_literal(); 653 } literal_two_byte_string()654 Vector<const uint16_t> literal_two_byte_string() { 655 DCHECK_NOT_NULL(current_.literal_chars); 656 return current_.literal_chars->two_byte_literal(); 657 } is_literal_one_byte()658 bool is_literal_one_byte() { 659 DCHECK_NOT_NULL(current_.literal_chars); 660 return current_.literal_chars->is_one_byte(); 661 } literal_length()662 int literal_length() const { 663 DCHECK_NOT_NULL(current_.literal_chars); 664 return current_.literal_chars->length(); 665 } 666 // Returns the literal string for the next token (the token that 667 // would be returned if Next() were called). next_literal_one_byte_string()668 Vector<const uint8_t> next_literal_one_byte_string() { 669 DCHECK_NOT_NULL(next_.literal_chars); 670 return next_.literal_chars->one_byte_literal(); 671 } next_literal_two_byte_string()672 Vector<const uint16_t> next_literal_two_byte_string() { 673 DCHECK_NOT_NULL(next_.literal_chars); 674 return next_.literal_chars->two_byte_literal(); 675 } is_next_literal_one_byte()676 bool is_next_literal_one_byte() { 677 DCHECK_NOT_NULL(next_.literal_chars); 678 return next_.literal_chars->is_one_byte(); 679 } raw_literal_one_byte_string()680 Vector<const uint8_t> raw_literal_one_byte_string() { 681 DCHECK_NOT_NULL(current_.raw_literal_chars); 682 return current_.raw_literal_chars->one_byte_literal(); 683 } raw_literal_two_byte_string()684 Vector<const uint16_t> raw_literal_two_byte_string() { 685 DCHECK_NOT_NULL(current_.raw_literal_chars); 686 return current_.raw_literal_chars->two_byte_literal(); 687 } is_raw_literal_one_byte()688 bool is_raw_literal_one_byte() { 689 DCHECK_NOT_NULL(current_.raw_literal_chars); 690 return current_.raw_literal_chars->is_one_byte(); 691 } 692 693 template <bool capture_raw, bool unicode = false> 694 uc32 ScanHexNumber(int expected_length); 695 // Scan a number of any length but not bigger than max_value. For example, the 696 // number can be 000000001, so it's very long in characters but its value is 697 // small. 698 template <bool capture_raw> 699 uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos); 700 701 // Scans a single JavaScript token. 702 void Scan(); 703 704 bool SkipWhiteSpace(); 705 Token::Value SkipSingleLineComment(); 706 Token::Value SkipSourceURLComment(); 707 void TryToParseSourceURLComment(); 708 Token::Value SkipMultiLineComment(); 709 // Scans a possible HTML comment -- begins with '<!'. 710 Token::Value ScanHtmlComment(); 711 712 void ScanDecimalDigits(); 713 Token::Value ScanNumber(bool seen_period); 714 Token::Value ScanIdentifierOrKeyword(); 715 Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped); 716 717 Token::Value ScanString(); 718 719 // Scans an escape-sequence which is part of a string and adds the 720 // decoded character to the current literal. Returns true if a pattern 721 // is scanned. 722 template <bool capture_raw, bool in_template_literal> 723 bool ScanEscape(); 724 725 // Decodes a Unicode escape-sequence which is part of an identifier. 726 // If the escape sequence cannot be decoded the result is kBadChar. 727 uc32 ScanIdentifierUnicodeEscape(); 728 // Helper for the above functions. 729 template <bool capture_raw> 730 uc32 ScanUnicodeEscape(); 731 732 Token::Value ScanTemplateSpan(); 733 734 // Return the current source position. source_pos()735 int source_pos() { 736 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; 737 } 738 LiteralContainsEscapes(const TokenDesc & token)739 static bool LiteralContainsEscapes(const TokenDesc& token) { 740 Location location = token.location; 741 int source_length = (location.end_pos - location.beg_pos); 742 if (token.token == Token::STRING) { 743 // Subtract delimiters. 744 source_length -= 2; 745 } 746 return token.literal_chars->length() != source_length; 747 } 748 749 UnicodeCache* unicode_cache_; 750 751 // Buffers collecting literal strings, numbers, etc. 752 LiteralBuffer literal_buffer0_; 753 LiteralBuffer literal_buffer1_; 754 LiteralBuffer literal_buffer2_; 755 756 // Values parsed from magic comments. 757 LiteralBuffer source_url_; 758 LiteralBuffer source_mapping_url_; 759 760 // Buffer to store raw string values 761 LiteralBuffer raw_literal_buffer0_; 762 LiteralBuffer raw_literal_buffer1_; 763 LiteralBuffer raw_literal_buffer2_; 764 765 TokenDesc current_; // desc for current token (as returned by Next()) 766 TokenDesc next_; // desc for next token (one token look-ahead) 767 TokenDesc next_next_; // desc for the token after next (after PeakAhead()) 768 769 // Variables for Scanner::BookmarkScope and the *Bookmark implementation. 770 // These variables contain the scanner state when a bookmark is set. 771 // 772 // We will use bookmark_c0_ as a 'control' variable, where: 773 // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_. 774 // - bookmark_c0_ == -1: No bookmark has been set. 775 // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark). 776 // 777 // Which state is being bookmarked? The parser state is distributed over 778 // several variables, roughly like this: 779 // ... 1234 + 5678 ..... [character stream] 780 // [current_] [next_] c0_ | [scanner state] 781 // So when the scanner is logically at the beginning of an expression 782 // like "1234 + 4567", then: 783 // - current_ contains "1234" 784 // - next_ contains "+" 785 // - c0_ contains ' ' (the space between "+" and "5678", 786 // - the source_ character stream points to the beginning of "5678". 787 // To be able to restore this state, we will keep copies of current_, next_, 788 // and c0_; we'll ask the stream to bookmark itself, and we'll copy the 789 // contents of current_'s and next_'s literal buffers to bookmark_*_literal_. 790 static const uc32 kNoBookmark = -1; 791 static const uc32 kBookmarkWasApplied = -2; 792 uc32 bookmark_c0_; 793 TokenDesc bookmark_current_; 794 TokenDesc bookmark_next_; 795 LiteralBuffer bookmark_current_literal_; 796 LiteralBuffer bookmark_current_raw_literal_; 797 LiteralBuffer bookmark_next_literal_; 798 LiteralBuffer bookmark_next_raw_literal_; 799 800 // Input stream. Must be initialized to an Utf16CharacterStream. 801 Utf16CharacterStream* source_; 802 803 // Last-seen positions of potentially problematic tokens. 804 Location octal_pos_; 805 Location decimal_with_leading_zero_pos_; 806 807 // One Unicode character look-ahead; c0_ < 0 at the end of the input. 808 uc32 c0_; 809 810 // Whether there is a line terminator whitespace character after 811 // the current token, and before the next. Does not count newlines 812 // inside multiline comments. 813 bool has_line_terminator_before_next_; 814 // Whether there is a multi-line comment that contains a 815 // line-terminator after the current token, and before the next. 816 bool has_multiline_comment_before_next_; 817 bool has_line_terminator_after_next_; 818 819 // Whether this scanner encountered an HTML comment. 820 bool found_html_comment_; 821 822 bool allow_harmony_exponentiation_operator_; 823 824 MessageTemplate::Template scanner_error_; 825 Location scanner_error_location_; 826 }; 827 828 } // namespace internal 829 } // namespace v8 830 831 #endif // V8_PARSING_SCANNER_H_ 832