• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Features shared by parsing and pre-parsing scanners.
6 
7 #ifndef V8_PARSING_SCANNER_H_
8 #define V8_PARSING_SCANNER_H_
9 
10 #include "src/allocation.h"
11 #include "src/base/hashmap.h"
12 #include "src/base/logging.h"
13 #include "src/char-predicates.h"
14 #include "src/collector.h"
15 #include "src/globals.h"
16 #include "src/list.h"
17 #include "src/messages.h"
18 #include "src/parsing/token.h"
19 #include "src/unicode-decoder.h"
20 #include "src/unicode.h"
21 
22 namespace v8 {
23 namespace internal {
24 
25 
26 class AstRawString;
27 class AstValueFactory;
28 class ParserRecorder;
29 class UnicodeCache;
30 
31 
32 // ---------------------------------------------------------------------
33 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
34 // A code unit is a 16 bit value representing either a 16 bit code point
35 // or one part of a surrogate pair that make a single 21 bit code point.
36 
37 class Utf16CharacterStream {
38  public:
Utf16CharacterStream()39   Utf16CharacterStream() : pos_(0) { }
~Utf16CharacterStream()40   virtual ~Utf16CharacterStream() { }
41 
42   // Returns and advances past the next UTF-16 code unit in the input
43   // stream. If there are no more code units, it returns a negative
44   // value.
Advance()45   inline uc32 Advance() {
46     if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
47       pos_++;
48       return static_cast<uc32>(*(buffer_cursor_++));
49     }
50     // Note: currently the following increment is necessary to avoid a
51     // parser problem! The scanner treats the final kEndOfInput as
52     // a code unit with a position, and does math relative to that
53     // position.
54     pos_++;
55 
56     return kEndOfInput;
57   }
58 
59   // Return the current position in the code unit stream.
60   // Starts at zero.
pos()61   inline size_t pos() const { return pos_; }
62 
63   // Skips forward past the next code_unit_count UTF-16 code units
64   // in the input, or until the end of input if that comes sooner.
65   // Returns the number of code units actually skipped. If less
66   // than code_unit_count,
SeekForward(size_t code_unit_count)67   inline size_t SeekForward(size_t code_unit_count) {
68     size_t buffered_chars = buffer_end_ - buffer_cursor_;
69     if (code_unit_count <= buffered_chars) {
70       buffer_cursor_ += code_unit_count;
71       pos_ += code_unit_count;
72       return code_unit_count;
73     }
74     return SlowSeekForward(code_unit_count);
75   }
76 
77   // Pushes back the most recently read UTF-16 code unit (or negative
78   // value if at end of input), i.e., the value returned by the most recent
79   // call to Advance.
80   // Must not be used right after calling SeekForward.
81   virtual void PushBack(int32_t code_unit) = 0;
82 
83   virtual bool SetBookmark();
84   virtual void ResetToBookmark();
85 
86  protected:
87   static const uc32 kEndOfInput = -1;
88 
89   // Ensures that the buffer_cursor_ points to the code_unit at
90   // position pos_ of the input, if possible. If the position
91   // is at or after the end of the input, return false. If there
92   // are more code_units available, return true.
93   virtual bool ReadBlock() = 0;
94   virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
95 
96   const uint16_t* buffer_cursor_;
97   const uint16_t* buffer_end_;
98   size_t pos_;
99 };
100 
101 
102 // ---------------------------------------------------------------------
103 // DuplicateFinder discovers duplicate symbols.
104 
105 class DuplicateFinder {
106  public:
DuplicateFinder(UnicodeCache * constants)107   explicit DuplicateFinder(UnicodeCache* constants)
108       : unicode_constants_(constants),
109         backing_store_(16),
110         map_(&Match) { }
111 
112   int AddOneByteSymbol(Vector<const uint8_t> key, int value);
113   int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
114   // Add a a number literal by converting it (if necessary)
115   // to the string that ToString(ToNumber(literal)) would generate.
116   // and then adding that string with AddOneByteSymbol.
117   // This string is the actual value used as key in an object literal,
118   // and the one that must be different from the other keys.
119   int AddNumber(Vector<const uint8_t> key, int value);
120 
121  private:
122   int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
123   // Backs up the key and its length in the backing store.
124   // The backup is stored with a base 127 encoding of the
125   // length (plus a bit saying whether the string is one byte),
126   // followed by the bytes of the key.
127   uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
128 
129   // Compare two encoded keys (both pointing into the backing store)
130   // for having the same base-127 encoded lengths and representation.
131   // and then having the same 'length' bytes following.
132   static bool Match(void* first, void* second);
133   // Creates a hash from a sequence of bytes.
134   static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
135   // Checks whether a string containing a JS number is its canonical
136   // form.
137   static bool IsNumberCanonical(Vector<const uint8_t> key);
138 
139   // Size of buffer. Sufficient for using it to call DoubleToCString in
140   // from conversions.h.
141   static const int kBufferSize = 100;
142 
143   UnicodeCache* unicode_constants_;
144   // Backing store used to store strings used as hashmap keys.
145   SequenceCollector<unsigned char> backing_store_;
146   base::HashMap map_;
147   // Buffer used for string->number->canonical string conversions.
148   char number_buffer_[kBufferSize];
149 };
150 
151 // ----------------------------------------------------------------------------
152 // LiteralBuffer -  Collector of chars of literals.
153 
154 const int kMaxAscii = 127;
155 
156 class LiteralBuffer {
157  public:
LiteralBuffer()158   LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
159 
~LiteralBuffer()160   ~LiteralBuffer() { backing_store_.Dispose(); }
161 
INLINE(void AddChar (char code_unit))162   INLINE(void AddChar(char code_unit)) {
163     if (position_ >= backing_store_.length()) ExpandBuffer();
164     DCHECK(is_one_byte_);
165     DCHECK(0 <= code_unit && code_unit <= kMaxAscii);
166     backing_store_[position_] = static_cast<byte>(code_unit);
167     position_ += kOneByteSize;
168     return;
169   }
170 
INLINE(void AddChar (uc32 code_unit))171   INLINE(void AddChar(uc32 code_unit)) {
172     if (position_ >= backing_store_.length()) ExpandBuffer();
173     if (is_one_byte_) {
174       if (code_unit <= unibrow::Latin1::kMaxChar) {
175         backing_store_[position_] = static_cast<byte>(code_unit);
176         position_ += kOneByteSize;
177         return;
178       }
179       ConvertToTwoByte();
180     }
181     if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
182       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
183       position_ += kUC16Size;
184     } else {
185       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
186           unibrow::Utf16::LeadSurrogate(code_unit);
187       position_ += kUC16Size;
188       if (position_ >= backing_store_.length()) ExpandBuffer();
189       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
190           unibrow::Utf16::TrailSurrogate(code_unit);
191       position_ += kUC16Size;
192     }
193   }
194 
is_one_byte()195   bool is_one_byte() const { return is_one_byte_; }
196 
is_contextual_keyword(Vector<const char> keyword)197   bool is_contextual_keyword(Vector<const char> keyword) const {
198     return is_one_byte() && keyword.length() == position_ &&
199         (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
200   }
201 
two_byte_literal()202   Vector<const uint16_t> two_byte_literal() const {
203     DCHECK(!is_one_byte_);
204     DCHECK((position_ & 0x1) == 0);
205     return Vector<const uint16_t>(
206         reinterpret_cast<const uint16_t*>(backing_store_.start()),
207         position_ >> 1);
208   }
209 
one_byte_literal()210   Vector<const uint8_t> one_byte_literal() const {
211     DCHECK(is_one_byte_);
212     return Vector<const uint8_t>(
213         reinterpret_cast<const uint8_t*>(backing_store_.start()),
214         position_);
215   }
216 
length()217   int length() const {
218     return is_one_byte_ ? position_ : (position_ >> 1);
219   }
220 
ReduceLength(int delta)221   void ReduceLength(int delta) {
222     position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
223   }
224 
Reset()225   void Reset() {
226     position_ = 0;
227     is_one_byte_ = true;
228   }
229 
230   Handle<String> Internalize(Isolate* isolate) const;
231 
CopyFrom(const LiteralBuffer * other)232   void CopyFrom(const LiteralBuffer* other) {
233     if (other == nullptr) {
234       Reset();
235     } else {
236       is_one_byte_ = other->is_one_byte_;
237       position_ = other->position_;
238       if (position_ < backing_store_.length()) {
239         std::copy(other->backing_store_.begin(),
240                   other->backing_store_.begin() + position_,
241                   backing_store_.begin());
242       } else {
243         backing_store_.Dispose();
244         backing_store_ = other->backing_store_.Clone();
245       }
246     }
247   }
248 
249  private:
250   static const int kInitialCapacity = 16;
251   static const int kGrowthFactory = 4;
252   static const int kMinConversionSlack = 256;
253   static const int kMaxGrowth = 1 * MB;
NewCapacity(int min_capacity)254   inline int NewCapacity(int min_capacity) {
255     int capacity = Max(min_capacity, backing_store_.length());
256     int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
257     return new_capacity;
258   }
259 
ExpandBuffer()260   void ExpandBuffer() {
261     Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
262     MemCopy(new_store.start(), backing_store_.start(), position_);
263     backing_store_.Dispose();
264     backing_store_ = new_store;
265   }
266 
ConvertToTwoByte()267   void ConvertToTwoByte() {
268     DCHECK(is_one_byte_);
269     Vector<byte> new_store;
270     int new_content_size = position_ * kUC16Size;
271     if (new_content_size >= backing_store_.length()) {
272       // Ensure room for all currently read code units as UC16 as well
273       // as the code unit about to be stored.
274       new_store = Vector<byte>::New(NewCapacity(new_content_size));
275     } else {
276       new_store = backing_store_;
277     }
278     uint8_t* src = backing_store_.start();
279     uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
280     for (int i = position_ - 1; i >= 0; i--) {
281       dst[i] = src[i];
282     }
283     if (new_store.start() != backing_store_.start()) {
284       backing_store_.Dispose();
285       backing_store_ = new_store;
286     }
287     position_ = new_content_size;
288     is_one_byte_ = false;
289   }
290 
291   bool is_one_byte_;
292   int position_;
293   Vector<byte> backing_store_;
294 
295   DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
296 };
297 
298 
299 // ----------------------------------------------------------------------------
300 // JavaScript Scanner.
301 
302 class Scanner {
303  public:
304   // Scoped helper for literal recording. Automatically drops the literal
305   // if aborting the scanning before it's complete.
306   class LiteralScope {
307    public:
LiteralScope(Scanner * self)308     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
309       scanner_->StartLiteral();
310     }
~LiteralScope()311      ~LiteralScope() {
312        if (!complete_) scanner_->DropLiteral();
313      }
Complete()314     void Complete() {
315       complete_ = true;
316     }
317 
318    private:
319     Scanner* scanner_;
320     bool complete_;
321   };
322 
323   // Scoped helper for a re-settable bookmark.
324   class BookmarkScope {
325    public:
BookmarkScope(Scanner * scanner)326     explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
327       DCHECK_NOT_NULL(scanner_);
328     }
~BookmarkScope()329     ~BookmarkScope() { scanner_->DropBookmark(); }
330 
Set()331     bool Set() { return scanner_->SetBookmark(); }
Reset()332     void Reset() { scanner_->ResetToBookmark(); }
HasBeenSet()333     bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
HasBeenReset()334     bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
335 
336    private:
337     Scanner* scanner_;
338 
339     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
340   };
341 
342   // Representation of an interval of source positions.
343   struct Location {
LocationLocation344     Location(int b, int e) : beg_pos(b), end_pos(e) { }
LocationLocation345     Location() : beg_pos(0), end_pos(0) { }
346 
IsValidLocation347     bool IsValid() const {
348       return beg_pos >= 0 && end_pos >= beg_pos;
349     }
350 
invalidLocation351     static Location invalid() { return Location(-1, -1); }
352 
353     int beg_pos;
354     int end_pos;
355   };
356 
357   // -1 is outside of the range of any real source code.
358   static const int kNoOctalLocation = -1;
359 
360   explicit Scanner(UnicodeCache* scanner_contants);
361 
362   void Initialize(Utf16CharacterStream* source);
363 
364   // Returns the next token and advances input.
365   Token::Value Next();
366   // Returns the token following peek()
367   Token::Value PeekAhead();
368   // Returns the current token again.
current_token()369   Token::Value current_token() { return current_.token; }
370   // Returns the location information for the current token
371   // (the token last returned by Next()).
location()372   Location location() const { return current_.location; }
373 
has_error()374   bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
error()375   MessageTemplate::Template error() const { return scanner_error_; }
error_location()376   Location error_location() const { return scanner_error_location_; }
377 
378   // Similar functions for the upcoming token.
379 
380   // One token look-ahead (past the token returned by Next()).
peek()381   Token::Value peek() const { return next_.token; }
382 
peek_location()383   Location peek_location() const { return next_.location; }
384 
literal_contains_escapes()385   bool literal_contains_escapes() const {
386     return LiteralContainsEscapes(current_);
387   }
next_literal_contains_escapes()388   bool next_literal_contains_escapes() const {
389     return LiteralContainsEscapes(next_);
390   }
is_literal_contextual_keyword(Vector<const char> keyword)391   bool is_literal_contextual_keyword(Vector<const char> keyword) {
392     DCHECK_NOT_NULL(current_.literal_chars);
393     return current_.literal_chars->is_contextual_keyword(keyword);
394   }
is_next_contextual_keyword(Vector<const char> keyword)395   bool is_next_contextual_keyword(Vector<const char> keyword) {
396     DCHECK_NOT_NULL(next_.literal_chars);
397     return next_.literal_chars->is_contextual_keyword(keyword);
398   }
399 
400   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
401   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
402   const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
403 
404   double DoubleValue();
405   bool ContainsDot();
406   bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
407     if (is_literal_one_byte() &&
408         literal_length() == length &&
409         (allow_escapes || !literal_contains_escapes())) {
410       const char* token =
411           reinterpret_cast<const char*>(literal_one_byte_string().start());
412       return !strncmp(token, data, length);
413     }
414     return false;
415   }
UnescapedLiteralMatches(const char * data,int length)416   inline bool UnescapedLiteralMatches(const char* data, int length) {
417     return LiteralMatches(data, length, false);
418   }
419 
IsGetOrSet(bool * is_get,bool * is_set)420   void IsGetOrSet(bool* is_get, bool* is_set) {
421     if (is_literal_one_byte() &&
422         literal_length() == 3 &&
423         !literal_contains_escapes()) {
424       const char* token =
425           reinterpret_cast<const char*>(literal_one_byte_string().start());
426       *is_get = strncmp(token, "get", 3) == 0;
427       *is_set = !*is_get && strncmp(token, "set", 3) == 0;
428     }
429   }
430 
431   int FindSymbol(DuplicateFinder* finder, int value);
432 
unicode_cache()433   UnicodeCache* unicode_cache() { return unicode_cache_; }
434 
435   // Returns the location of the last seen octal literal.
octal_position()436   Location octal_position() const { return octal_pos_; }
clear_octal_position()437   void clear_octal_position() { octal_pos_ = Location::invalid(); }
438   // Returns the location of the last seen decimal literal with a leading zero.
decimal_with_leading_zero_position()439   Location decimal_with_leading_zero_position() const {
440     return decimal_with_leading_zero_pos_;
441   }
clear_decimal_with_leading_zero_position()442   void clear_decimal_with_leading_zero_position() {
443     decimal_with_leading_zero_pos_ = Location::invalid();
444   }
445 
446   // Returns the value of the last smi that was scanned.
smi_value()447   int smi_value() const { return current_.smi_value_; }
448 
449   // Seek forward to the given position.  This operation does not
450   // work in general, for instance when there are pushed back
451   // characters, but works for seeking forward until simple delimiter
452   // tokens, which is what it is used for.
453   void SeekForward(int pos);
454 
455   // Returns true if there was a line terminator before the peek'ed token,
456   // possibly inside a multi-line comment.
HasAnyLineTerminatorBeforeNext()457   bool HasAnyLineTerminatorBeforeNext() const {
458     return has_line_terminator_before_next_ ||
459            has_multiline_comment_before_next_;
460   }
461 
HasAnyLineTerminatorAfterNext()462   bool HasAnyLineTerminatorAfterNext() {
463     Token::Value ensure_next_next = PeekAhead();
464     USE(ensure_next_next);
465     return has_line_terminator_after_next_;
466   }
467 
468   // Scans the input as a regular expression pattern, previous
469   // character(s) must be /(=). Returns true if a pattern is scanned.
470   bool ScanRegExpPattern(bool seen_equal);
471   // Scans the input as regular expression flags. Returns the flags on success.
472   Maybe<RegExp::Flags> ScanRegExpFlags();
473 
474   // Scans the input as a template literal
475   Token::Value ScanTemplateStart();
476   Token::Value ScanTemplateContinuation();
477 
source_url()478   const LiteralBuffer* source_url() const { return &source_url_; }
source_mapping_url()479   const LiteralBuffer* source_mapping_url() const {
480     return &source_mapping_url_;
481   }
482 
483   bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
484 
FoundHtmlComment()485   bool FoundHtmlComment() const { return found_html_comment_; }
486 
487 #define DECLARE_ACCESSORS(name)                                \
488   inline bool allow_##name() const { return allow_##name##_; } \
489   inline void set_allow_##name(bool allow) { allow_##name##_ = allow; }
490   DECLARE_ACCESSORS(harmony_exponentiation_operator)
491 #undef ACCESSOR
492 
493  private:
494   // The current and look-ahead token.
495   struct TokenDesc {
496     Token::Value token;
497     Location location;
498     LiteralBuffer* literal_chars;
499     LiteralBuffer* raw_literal_chars;
500     int smi_value_;
501   };
502 
503   static const int kCharacterLookaheadBufferSize = 1;
504 
505   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
506   template <bool capture_raw>
507   uc32 ScanOctalEscape(uc32 c, int length);
508 
509   // Call this after setting source_ to the input.
Init()510   void Init() {
511     // Set c0_ (one character ahead)
512     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
513     Advance();
514     // Initialize current_ to not refer to a literal.
515     current_.literal_chars = NULL;
516     current_.raw_literal_chars = NULL;
517     next_next_.token = Token::UNINITIALIZED;
518     found_html_comment_ = false;
519     scanner_error_ = MessageTemplate::kNone;
520   }
521 
522   // Support BookmarkScope functionality.
523   bool SetBookmark();
524   void ResetToBookmark();
525   bool BookmarkHasBeenSet();
526   bool BookmarkHasBeenReset();
527   void DropBookmark();
528   static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
529 
ReportScannerError(const Location & location,MessageTemplate::Template error)530   void ReportScannerError(const Location& location,
531                           MessageTemplate::Template error) {
532     if (has_error()) return;
533     scanner_error_ = error;
534     scanner_error_location_ = location;
535   }
536 
ReportScannerError(int pos,MessageTemplate::Template error)537   void ReportScannerError(int pos, MessageTemplate::Template error) {
538     if (has_error()) return;
539     scanner_error_ = error;
540     scanner_error_location_ = Location(pos, pos + 1);
541   }
542 
543   // Literal buffer support
StartLiteral()544   inline void StartLiteral() {
545     LiteralBuffer* free_buffer =
546         (current_.literal_chars == &literal_buffer0_)
547             ? &literal_buffer1_
548             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
549                                                             : &literal_buffer0_;
550     free_buffer->Reset();
551     next_.literal_chars = free_buffer;
552   }
553 
StartRawLiteral()554   inline void StartRawLiteral() {
555     LiteralBuffer* free_buffer =
556         (current_.raw_literal_chars == &raw_literal_buffer0_)
557             ? &raw_literal_buffer1_
558             : (current_.raw_literal_chars == &raw_literal_buffer1_)
559                   ? &raw_literal_buffer2_
560                   : &raw_literal_buffer0_;
561     free_buffer->Reset();
562     next_.raw_literal_chars = free_buffer;
563   }
564 
INLINE(void AddLiteralChar (uc32 c))565   INLINE(void AddLiteralChar(uc32 c)) {
566     DCHECK_NOT_NULL(next_.literal_chars);
567     next_.literal_chars->AddChar(c);
568   }
569 
INLINE(void AddLiteralChar (char c))570   INLINE(void AddLiteralChar(char c)) {
571     DCHECK_NOT_NULL(next_.literal_chars);
572     next_.literal_chars->AddChar(c);
573   }
574 
INLINE(void AddRawLiteralChar (uc32 c))575   INLINE(void AddRawLiteralChar(uc32 c)) {
576     DCHECK_NOT_NULL(next_.raw_literal_chars);
577     next_.raw_literal_chars->AddChar(c);
578   }
579 
INLINE(void ReduceRawLiteralLength (int delta))580   INLINE(void ReduceRawLiteralLength(int delta)) {
581     DCHECK_NOT_NULL(next_.raw_literal_chars);
582     next_.raw_literal_chars->ReduceLength(delta);
583   }
584 
585   // Stops scanning of a literal and drop the collected characters,
586   // e.g., due to an encountered error.
DropLiteral()587   inline void DropLiteral() {
588     next_.literal_chars = NULL;
589     next_.raw_literal_chars = NULL;
590   }
591 
AddLiteralCharAdvance()592   inline void AddLiteralCharAdvance() {
593     AddLiteralChar(c0_);
594     Advance();
595   }
596 
597   // Low-level scanning support.
598   template <bool capture_raw = false, bool check_surrogate = true>
Advance()599   void Advance() {
600     if (capture_raw) {
601       AddRawLiteralChar(c0_);
602     }
603     c0_ = source_->Advance();
604     if (check_surrogate) HandleLeadSurrogate();
605   }
606 
HandleLeadSurrogate()607   void HandleLeadSurrogate() {
608     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
609       uc32 c1 = source_->Advance();
610       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
611         source_->PushBack(c1);
612       } else {
613         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
614       }
615     }
616   }
617 
PushBack(uc32 ch)618   void PushBack(uc32 ch) {
619     if (c0_ > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
620       source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
621       source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
622     } else {
623       source_->PushBack(c0_);
624     }
625     c0_ = ch;
626   }
627 
Select(Token::Value tok)628   inline Token::Value Select(Token::Value tok) {
629     Advance();
630     return tok;
631   }
632 
Select(uc32 next,Token::Value then,Token::Value else_)633   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
634     Advance();
635     if (c0_ == next) {
636       Advance();
637       return then;
638     } else {
639       return else_;
640     }
641   }
642 
643   // Returns the literal string, if any, for the current token (the
644   // token last returned by Next()). The string is 0-terminated.
645   // Literal strings are collected for identifiers, strings, numbers as well
646   // as for template literals. For template literals we also collect the raw
647   // form.
648   // These functions only give the correct result if the literal was scanned
649   // when a LiteralScope object is alive.
literal_one_byte_string()650   Vector<const uint8_t> literal_one_byte_string() {
651     DCHECK_NOT_NULL(current_.literal_chars);
652     return current_.literal_chars->one_byte_literal();
653   }
literal_two_byte_string()654   Vector<const uint16_t> literal_two_byte_string() {
655     DCHECK_NOT_NULL(current_.literal_chars);
656     return current_.literal_chars->two_byte_literal();
657   }
is_literal_one_byte()658   bool is_literal_one_byte() {
659     DCHECK_NOT_NULL(current_.literal_chars);
660     return current_.literal_chars->is_one_byte();
661   }
literal_length()662   int literal_length() const {
663     DCHECK_NOT_NULL(current_.literal_chars);
664     return current_.literal_chars->length();
665   }
666   // Returns the literal string for the next token (the token that
667   // would be returned if Next() were called).
next_literal_one_byte_string()668   Vector<const uint8_t> next_literal_one_byte_string() {
669     DCHECK_NOT_NULL(next_.literal_chars);
670     return next_.literal_chars->one_byte_literal();
671   }
next_literal_two_byte_string()672   Vector<const uint16_t> next_literal_two_byte_string() {
673     DCHECK_NOT_NULL(next_.literal_chars);
674     return next_.literal_chars->two_byte_literal();
675   }
is_next_literal_one_byte()676   bool is_next_literal_one_byte() {
677     DCHECK_NOT_NULL(next_.literal_chars);
678     return next_.literal_chars->is_one_byte();
679   }
raw_literal_one_byte_string()680   Vector<const uint8_t> raw_literal_one_byte_string() {
681     DCHECK_NOT_NULL(current_.raw_literal_chars);
682     return current_.raw_literal_chars->one_byte_literal();
683   }
raw_literal_two_byte_string()684   Vector<const uint16_t> raw_literal_two_byte_string() {
685     DCHECK_NOT_NULL(current_.raw_literal_chars);
686     return current_.raw_literal_chars->two_byte_literal();
687   }
is_raw_literal_one_byte()688   bool is_raw_literal_one_byte() {
689     DCHECK_NOT_NULL(current_.raw_literal_chars);
690     return current_.raw_literal_chars->is_one_byte();
691   }
692 
693   template <bool capture_raw, bool unicode = false>
694   uc32 ScanHexNumber(int expected_length);
695   // Scan a number of any length but not bigger than max_value. For example, the
696   // number can be 000000001, so it's very long in characters but its value is
697   // small.
698   template <bool capture_raw>
699   uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
700 
701   // Scans a single JavaScript token.
702   void Scan();
703 
704   bool SkipWhiteSpace();
705   Token::Value SkipSingleLineComment();
706   Token::Value SkipSourceURLComment();
707   void TryToParseSourceURLComment();
708   Token::Value SkipMultiLineComment();
709   // Scans a possible HTML comment -- begins with '<!'.
710   Token::Value ScanHtmlComment();
711 
712   void ScanDecimalDigits();
713   Token::Value ScanNumber(bool seen_period);
714   Token::Value ScanIdentifierOrKeyword();
715   Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
716 
717   Token::Value ScanString();
718 
719   // Scans an escape-sequence which is part of a string and adds the
720   // decoded character to the current literal. Returns true if a pattern
721   // is scanned.
722   template <bool capture_raw, bool in_template_literal>
723   bool ScanEscape();
724 
725   // Decodes a Unicode escape-sequence which is part of an identifier.
726   // If the escape sequence cannot be decoded the result is kBadChar.
727   uc32 ScanIdentifierUnicodeEscape();
728   // Helper for the above functions.
729   template <bool capture_raw>
730   uc32 ScanUnicodeEscape();
731 
732   Token::Value ScanTemplateSpan();
733 
734   // Return the current source position.
source_pos()735   int source_pos() {
736     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
737   }
738 
LiteralContainsEscapes(const TokenDesc & token)739   static bool LiteralContainsEscapes(const TokenDesc& token) {
740     Location location = token.location;
741     int source_length = (location.end_pos - location.beg_pos);
742     if (token.token == Token::STRING) {
743       // Subtract delimiters.
744       source_length -= 2;
745     }
746     return token.literal_chars->length() != source_length;
747   }
748 
749   UnicodeCache* unicode_cache_;
750 
751   // Buffers collecting literal strings, numbers, etc.
752   LiteralBuffer literal_buffer0_;
753   LiteralBuffer literal_buffer1_;
754   LiteralBuffer literal_buffer2_;
755 
756   // Values parsed from magic comments.
757   LiteralBuffer source_url_;
758   LiteralBuffer source_mapping_url_;
759 
760   // Buffer to store raw string values
761   LiteralBuffer raw_literal_buffer0_;
762   LiteralBuffer raw_literal_buffer1_;
763   LiteralBuffer raw_literal_buffer2_;
764 
765   TokenDesc current_;    // desc for current token (as returned by Next())
766   TokenDesc next_;       // desc for next token (one token look-ahead)
767   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
768 
769   // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
770   // These variables contain the scanner state when a bookmark is set.
771   //
772   // We will use bookmark_c0_ as a 'control' variable, where:
773   // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
774   // - bookmark_c0_ == -1: No bookmark has been set.
775   // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
776   //
777   // Which state is being bookmarked? The parser state is distributed over
778   // several variables, roughly like this:
779   //   ...    1234        +       5678 ..... [character stream]
780   //       [current_] [next_] c0_ |      [scanner state]
781   // So when the scanner is logically at the beginning of an expression
782   // like "1234 + 4567", then:
783   // - current_ contains "1234"
784   // - next_ contains "+"
785   // - c0_ contains ' ' (the space between "+" and "5678",
786   // - the source_ character stream points to the beginning of "5678".
787   // To be able to restore this state, we will keep copies of current_, next_,
788   // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
789   // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
790   static const uc32 kNoBookmark = -1;
791   static const uc32 kBookmarkWasApplied = -2;
792   uc32 bookmark_c0_;
793   TokenDesc bookmark_current_;
794   TokenDesc bookmark_next_;
795   LiteralBuffer bookmark_current_literal_;
796   LiteralBuffer bookmark_current_raw_literal_;
797   LiteralBuffer bookmark_next_literal_;
798   LiteralBuffer bookmark_next_raw_literal_;
799 
800   // Input stream. Must be initialized to an Utf16CharacterStream.
801   Utf16CharacterStream* source_;
802 
803   // Last-seen positions of potentially problematic tokens.
804   Location octal_pos_;
805   Location decimal_with_leading_zero_pos_;
806 
807   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
808   uc32 c0_;
809 
810   // Whether there is a line terminator whitespace character after
811   // the current token, and  before the next. Does not count newlines
812   // inside multiline comments.
813   bool has_line_terminator_before_next_;
814   // Whether there is a multi-line comment that contains a
815   // line-terminator after the current token, and before the next.
816   bool has_multiline_comment_before_next_;
817   bool has_line_terminator_after_next_;
818 
819   // Whether this scanner encountered an HTML comment.
820   bool found_html_comment_;
821 
822   bool allow_harmony_exponentiation_operator_;
823 
824   MessageTemplate::Template scanner_error_;
825   Location scanner_error_location_;
826 };
827 
828 }  // namespace internal
829 }  // namespace v8
830 
831 #endif  // V8_PARSING_SCANNER_H_
832