• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_SCANNER_H_
6 #define V8_ASMJS_ASM_SCANNER_H_
7 
8 #include <memory>
9 #include <string>
10 #include <unordered_map>
11 
12 #include "src/asmjs/asm-names.h"
13 #include "src/base/logging.h"
14 #include "src/common/globals.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 class Utf16CharacterStream;
20 
21 // A custom scanner to extract the token stream needed to parse valid
22 // asm.js: http://asmjs.org/spec/latest/
23 // This scanner intentionally avoids the portion of JavaScript lexing
24 // that are not required to determine if code is valid asm.js code.
25 // * Strings are disallowed except for 'use asm'.
26 // * Only the subset of keywords needed to check asm.js invariants are
27 //   included.
28 // * Identifiers are accumulated into local + global string tables
29 //   (for performance).
30 class V8_EXPORT_PRIVATE AsmJsScanner {
31  public:
32   using token_t = int32_t;
33 
34   explicit AsmJsScanner(Utf16CharacterStream* stream);
35 
36   // Get current token.
Token()37   token_t Token() const { return token_; }
38   // Get position of current token.
Position()39   size_t Position() const { return position_; }
40   // Advance to the next token.
41   void Next();
42   // Back up by one token.
43   void Rewind();
44 
45   // Get raw string for current identifier. Note that the returned string will
46   // become invalid when the scanner advances, create a copy to preserve it.
GetIdentifierString()47   const std::string& GetIdentifierString() const {
48     // Identifier strings don't work after a rewind.
49     DCHECK(!rewind_);
50     return identifier_string_;
51   }
52 
53   // Check if we just passed a newline.
IsPrecededByNewline()54   bool IsPrecededByNewline() const {
55     // Newline tracking doesn't work if you back up.
56     DCHECK(!rewind_);
57     return preceded_by_newline_;
58   }
59 
60 #if DEBUG
61   // Debug only method to go from a token back to its name.
62   // Slow, only use for debugging.
63   std::string Name(token_t token) const;
64 #endif
65 
66   // Restores old position (token after that position). Note that it is not
67   // allowed to rewind right after a seek, because previous tokens are unknown.
68   void Seek(size_t pos);
69 
70   // Select whether identifiers are resolved in global or local scope,
71   // and which scope new identifiers are added to.
EnterLocalScope()72   void EnterLocalScope() { in_local_scope_ = true; }
EnterGlobalScope()73   void EnterGlobalScope() { in_local_scope_ = false; }
74   // Drop all current local identifiers.
75   void ResetLocals();
76 
77   // Methods to check if a token is an identifier and which scope.
IsLocal()78   bool IsLocal() const { return IsLocal(Token()); }
IsGlobal()79   bool IsGlobal() const { return IsGlobal(Token()); }
IsLocal(token_t token)80   static bool IsLocal(token_t token) { return token <= kLocalsStart; }
IsGlobal(token_t token)81   static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
82   // Methods to find the index position of an identifier (count starting from
83   // 0 for each scope separately).
LocalIndex(token_t token)84   static size_t LocalIndex(token_t token) {
85     DCHECK(IsLocal(token));
86     return -(token - kLocalsStart);
87   }
GlobalIndex(token_t token)88   static size_t GlobalIndex(token_t token) {
89     DCHECK(IsGlobal(token));
90     return token - kGlobalsStart;
91   }
92 
93   // Methods to check if the current token is a numeric literal considered an
94   // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
95   // that numbers without a dot outside the [0 .. 2^32) range are errors.
IsUnsigned()96   bool IsUnsigned() const { return Token() == kUnsigned; }
AsUnsigned()97   uint32_t AsUnsigned() const {
98     DCHECK(IsUnsigned());
99     return unsigned_value_;
100   }
IsDouble()101   bool IsDouble() const { return Token() == kDouble; }
AsDouble()102   double AsDouble() const {
103     DCHECK(IsDouble());
104     return double_value_;
105   }
106 
107   // clang-format off
108   enum {
109     // [-10000-kMaxIdentifierCount, -10000)    :: Local identifiers (counting
110     //                                            backwards)
111     // [-10000 .. -1)                          :: Builtin tokens like keywords
112     //                                            (also includes some special
113     //                                             ones like end of input)
114     // 0        .. 255                         :: Single char tokens
115     // 256      .. 256+kMaxIdentifierCount     :: Global identifiers
116     kLocalsStart = -10000,
117 #define V(name, _junk1, _junk2, _junk3) kToken_##name,
118     STDLIB_MATH_FUNCTION_LIST(V)
119     STDLIB_ARRAY_TYPE_LIST(V)
120 #undef V
121 #define V(name, _junk1) kToken_##name,
122     STDLIB_MATH_VALUE_LIST(V)
123 #undef V
124 #define V(name) kToken_##name,
125     STDLIB_OTHER_LIST(V)
126     KEYWORD_NAME_LIST(V)
127 #undef V
128 #define V(rawname, name) kToken_##name,
129     LONG_SYMBOL_NAME_LIST(V)
130 #undef V
131 #define V(name, value, string_name) name = value,
132     SPECIAL_TOKEN_LIST(V)
133 #undef V
134     kGlobalsStart = 256,
135   };
136   // clang-format on
137 
138   static constexpr uc32 kEndOfInputU = static_cast<uc32>(kEndOfInput);
139 
140  private:
141   Utf16CharacterStream* stream_;
142   token_t token_;
143   token_t preceding_token_;
144   token_t next_token_;         // Only set when in {rewind} state.
145   size_t position_;            // Corresponds to {token} position.
146   size_t preceding_position_;  // Corresponds to {preceding_token} position.
147   size_t next_position_;       // Only set when in {rewind} state.
148   bool rewind_;
149   std::string identifier_string_;
150   bool in_local_scope_;
151   std::unordered_map<std::string, token_t> local_names_;
152   std::unordered_map<std::string, token_t> global_names_;
153   std::unordered_map<std::string, token_t> property_names_;
154   int global_count_;
155   double double_value_;
156   uint32_t unsigned_value_;
157   bool preceded_by_newline_;
158 
159   // Consume multiple characters.
160   void ConsumeIdentifier(uc32 ch);
161   void ConsumeNumber(uc32 ch);
162   bool ConsumeCComment();
163   void ConsumeCPPComment();
164   void ConsumeString(uc32 quote);
165   void ConsumeCompareOrShift(uc32 ch);
166 
167   // Classify character categories.
168   bool IsIdentifierStart(uc32 ch);
169   bool IsIdentifierPart(uc32 ch);
170   bool IsNumberStart(uc32 ch);
171 };
172 
173 }  // namespace internal
174 }  // namespace v8
175 
176 #endif  // V8_ASMJS_ASM_SCANNER_H_
177