• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_SCANNER_H_
6 #define V8_ASMJS_ASM_SCANNER_H_
7 
8 #include <memory>
9 #include <string>
10 #include <unordered_map>
11 
12 #include "src/asmjs/asm-names.h"
13 #include "src/base/logging.h"
14 #include "src/base/strings.h"
15 #include "src/common/globals.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 class Utf16CharacterStream;
21 
22 // A custom scanner to extract the token stream needed to parse valid
23 // asm.js: http://asmjs.org/spec/latest/
24 // This scanner intentionally avoids the portion of JavaScript lexing
25 // that are not required to determine if code is valid asm.js code.
26 // * Strings are disallowed except for 'use asm'.
27 // * Only the subset of keywords needed to check asm.js invariants are
28 //   included.
29 // * Identifiers are accumulated into local + global string tables
30 //   (for performance).
31 class V8_EXPORT_PRIVATE AsmJsScanner {
32  public:
33   using token_t = int32_t;
34 
35   explicit AsmJsScanner(Utf16CharacterStream* stream);
36 
37   // Get current token.
Token()38   token_t Token() const { return token_; }
39   // Get position of current token.
Position()40   size_t Position() const { return position_; }
41   // Advance to the next token.
42   void Next();
43   // Back up by one token.
44   void Rewind();
45 
46   // Get raw string for current identifier. Note that the returned string will
47   // become invalid when the scanner advances, create a copy to preserve it.
GetIdentifierString()48   const std::string& GetIdentifierString() const {
49     // Identifier strings don't work after a rewind.
50     DCHECK(!rewind_);
51     return identifier_string_;
52   }
53 
54   // Check if we just passed a newline.
IsPrecededByNewline()55   bool IsPrecededByNewline() const {
56     // Newline tracking doesn't work if you back up.
57     DCHECK(!rewind_);
58     return preceded_by_newline_;
59   }
60 
61 #if DEBUG
62   // Debug only method to go from a token back to its name.
63   // Slow, only use for debugging.
64   std::string Name(token_t token) const;
65 #endif
66 
67   // Restores old position (token after that position). Note that it is not
68   // allowed to rewind right after a seek, because previous tokens are unknown.
69   void Seek(size_t pos);
70 
71   // Select whether identifiers are resolved in global or local scope,
72   // and which scope new identifiers are added to.
EnterLocalScope()73   void EnterLocalScope() { in_local_scope_ = true; }
EnterGlobalScope()74   void EnterGlobalScope() { in_local_scope_ = false; }
75   // Drop all current local identifiers.
76   void ResetLocals();
77 
78   // Methods to check if a token is an identifier and which scope.
IsLocal()79   bool IsLocal() const { return IsLocal(Token()); }
IsGlobal()80   bool IsGlobal() const { return IsGlobal(Token()); }
IsLocal(token_t token)81   static bool IsLocal(token_t token) { return token <= kLocalsStart; }
IsGlobal(token_t token)82   static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
83   // Methods to find the index position of an identifier (count starting from
84   // 0 for each scope separately).
LocalIndex(token_t token)85   static size_t LocalIndex(token_t token) {
86     DCHECK(IsLocal(token));
87     return -(token - kLocalsStart);
88   }
GlobalIndex(token_t token)89   static size_t GlobalIndex(token_t token) {
90     DCHECK(IsGlobal(token));
91     return token - kGlobalsStart;
92   }
93 
94   // Methods to check if the current token is a numeric literal considered an
95   // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
96   // that numbers without a dot outside the [0 .. 2^32) range are errors.
IsUnsigned()97   bool IsUnsigned() const { return Token() == kUnsigned; }
AsUnsigned()98   uint32_t AsUnsigned() const {
99     DCHECK(IsUnsigned());
100     return unsigned_value_;
101   }
IsDouble()102   bool IsDouble() const { return Token() == kDouble; }
AsDouble()103   double AsDouble() const {
104     DCHECK(IsDouble());
105     return double_value_;
106   }
107 
108   // clang-format off
109   enum {
110     // [-10000-kMaxIdentifierCount, -10000)    :: Local identifiers (counting
111     //                                            backwards)
112     // [-10000 .. -1)                          :: Builtin tokens like keywords
113     //                                            (also includes some special
114     //                                             ones like end of input)
115     // 0        .. 255                         :: Single char tokens
116     // 256      .. 256+kMaxIdentifierCount     :: Global identifiers
117     kLocalsStart = -10000,
118 #define V(name, _junk1, _junk2, _junk3) kToken_##name,
119     STDLIB_MATH_FUNCTION_LIST(V)
120     STDLIB_ARRAY_TYPE_LIST(V)
121 #undef V
122 #define V(name, _junk1) kToken_##name,
123     STDLIB_MATH_VALUE_LIST(V)
124 #undef V
125 #define V(name) kToken_##name,
126     STDLIB_OTHER_LIST(V)
127     KEYWORD_NAME_LIST(V)
128 #undef V
129 #define V(rawname, name) kToken_##name,
130     LONG_SYMBOL_NAME_LIST(V)
131 #undef V
132 #define V(name, value, string_name) name = value,
133     SPECIAL_TOKEN_LIST(V)
134 #undef V
135     kGlobalsStart = 256,
136   };
137   // clang-format on
138 
139   static constexpr base::uc32 kEndOfInputU =
140       static_cast<base::uc32>(kEndOfInput);
141 
142  private:
143   Utf16CharacterStream* stream_;
144   token_t token_;
145   token_t preceding_token_;
146   token_t next_token_;         // Only set when in {rewind} state.
147   size_t position_;            // Corresponds to {token} position.
148   size_t preceding_position_;  // Corresponds to {preceding_token} position.
149   size_t next_position_;       // Only set when in {rewind} state.
150   bool rewind_;
151   std::string identifier_string_;
152   bool in_local_scope_;
153   std::unordered_map<std::string, token_t> local_names_;
154   std::unordered_map<std::string, token_t> global_names_;
155   std::unordered_map<std::string, token_t> property_names_;
156   int global_count_;
157   double double_value_;
158   uint32_t unsigned_value_;
159   bool preceded_by_newline_;
160 
161   // Consume multiple characters.
162   void ConsumeIdentifier(base::uc32 ch);
163   void ConsumeNumber(base::uc32 ch);
164   bool ConsumeCComment();
165   void ConsumeCPPComment();
166   void ConsumeString(base::uc32 quote);
167   void ConsumeCompareOrShift(base::uc32 ch);
168 
169   // Classify character categories.
170   bool IsIdentifierStart(base::uc32 ch);
171   bool IsIdentifierPart(base::uc32 ch);
172   bool IsNumberStart(base::uc32 ch);
173 };
174 
175 }  // namespace internal
176 }  // namespace v8
177 
178 #endif  // V8_ASMJS_ASM_SCANNER_H_
179