• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/asmjs/asm-scanner.h"
6 
7 #include <cinttypes>
8 
9 #include "src/flags/flags.h"
10 #include "src/numbers/conversions.h"
11 #include "src/parsing/scanner.h"
12 #include "src/strings/char-predicates-inl.h"
13 
14 namespace v8 {
15 namespace internal {
16 
17 namespace {
18 // Cap number of identifiers to ensure we can assign both global and
19 // local ones a token id in the range of an int32_t.
20 static const int kMaxIdentifierCount = 0xF000000;
21 }  // namespace
22 
AsmJsScanner(Utf16CharacterStream * stream)23 AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
24     : stream_(stream),
25       token_(kUninitialized),
26       preceding_token_(kUninitialized),
27       next_token_(kUninitialized),
28       position_(0),
29       preceding_position_(0),
30       next_position_(0),
31       rewind_(false),
32       in_local_scope_(false),
33       global_count_(0),
34       double_value_(0.0),
35       unsigned_value_(0),
36       preceded_by_newline_(false) {
37 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
38   STDLIB_MATH_FUNCTION_LIST(V)
39   STDLIB_ARRAY_TYPE_LIST(V)
40 #undef V
41 #define V(name, _junk1) property_names_[#name] = kToken_##name;
42   STDLIB_MATH_VALUE_LIST(V)
43 #undef V
44 #define V(name) property_names_[#name] = kToken_##name;
45   STDLIB_OTHER_LIST(V)
46 #undef V
47 #define V(name) global_names_[#name] = kToken_##name;
48   KEYWORD_NAME_LIST(V)
49 #undef V
50   Next();
51 }
52 
Next()53 void AsmJsScanner::Next() {
54   if (rewind_) {
55     preceding_token_ = token_;
56     preceding_position_ = position_;
57     token_ = next_token_;
58     position_ = next_position_;
59     next_token_ = kUninitialized;
60     next_position_ = 0;
61     rewind_ = false;
62     return;
63   }
64 
65   if (token_ == kEndOfInput || token_ == kParseError) {
66     return;
67   }
68 
69 #if DEBUG
70   if (FLAG_trace_asm_scanner) {
71     if (Token() == kDouble) {
72       PrintF("%lf ", AsDouble());
73     } else if (Token() == kUnsigned) {
74       PrintF("%" PRIu32 " ", AsUnsigned());
75     } else {
76       std::string name = Name(Token());
77       PrintF("%s ", name.c_str());
78     }
79   }
80 #endif
81 
82   preceded_by_newline_ = false;
83   preceding_token_ = token_;
84   preceding_position_ = position_;
85 
86   for (;;) {
87     position_ = stream_->pos();
88     base::uc32 ch = stream_->Advance();
89     switch (ch) {
90       case ' ':
91       case '\t':
92       case '\r':
93         // Ignore whitespace.
94         break;
95 
96       case '\n':
97         // Track when we've passed a newline for optional semicolon support,
98         // but keep scanning.
99         preceded_by_newline_ = true;
100         break;
101 
102       case kEndOfInputU:
103         token_ = kEndOfInput;
104         return;
105 
106       case '\'':
107       case '"':
108         ConsumeString(ch);
109         return;
110 
111       case '/':
112         ch = stream_->Advance();
113         if (ch == '/') {
114           ConsumeCPPComment();
115         } else if (ch == '*') {
116           if (!ConsumeCComment()) {
117             token_ = kParseError;
118             return;
119           }
120         } else {
121           stream_->Back();
122           token_ = '/';
123           return;
124         }
125         // Breaks out of switch, but loops again (i.e. the case when we parsed
126         // a comment, but need to continue to look for the next token).
127         break;
128 
129       case '<':
130       case '>':
131       case '=':
132       case '!':
133         ConsumeCompareOrShift(ch);
134         return;
135 
136 #define V(single_char_token) case single_char_token:
137         SIMPLE_SINGLE_TOKEN_LIST(V)
138 #undef V
139         // Use fixed token IDs for ASCII.
140         token_ = ch;
141         return;
142 
143       default:
144         if (IsIdentifierStart(ch)) {
145           ConsumeIdentifier(ch);
146         } else if (IsNumberStart(ch)) {
147           ConsumeNumber(ch);
148         } else {
149           // TODO(bradnelson): Support unicode (probably via UnicodeCache).
150           token_ = kParseError;
151         }
152         return;
153     }
154   }
155 }
156 
Rewind()157 void AsmJsScanner::Rewind() {
158   DCHECK_NE(kUninitialized, preceding_token_);
159   // TODO(bradnelson): Currently rewinding needs to leave in place the
160   // preceding newline state (in case a |0 ends a line).
161   // This is weird and stateful, fix me.
162   DCHECK(!rewind_);
163   next_token_ = token_;
164   next_position_ = position_;
165   token_ = preceding_token_;
166   position_ = preceding_position_;
167   preceding_token_ = kUninitialized;
168   preceding_position_ = 0;
169   rewind_ = true;
170   identifier_string_.clear();
171 }
172 
ResetLocals()173 void AsmJsScanner::ResetLocals() { local_names_.clear(); }
174 
175 #if DEBUG
176 // Only used for debugging.
Name(token_t token) const177 std::string AsmJsScanner::Name(token_t token) const {
178   if (token >= 32 && token < 127) {
179     return std::string(1, static_cast<char>(token));
180   }
181   for (auto& i : local_names_) {
182     if (i.second == token) {
183       return i.first;
184     }
185   }
186   for (auto& i : global_names_) {
187     if (i.second == token) {
188       return i.first;
189     }
190   }
191   for (auto& i : property_names_) {
192     if (i.second == token) {
193       return i.first;
194     }
195   }
196   switch (token) {
197 #define V(rawname, name) \
198   case kToken_##name:    \
199     return rawname;
200     LONG_SYMBOL_NAME_LIST(V)
201 #undef V
202 #define V(name, value, string_name) \
203   case name:                        \
204     return string_name;
205     SPECIAL_TOKEN_LIST(V)
206     default:
207       break;
208 #undef V
209   }
210   UNREACHABLE();
211 }
212 #endif
213 
Seek(size_t pos)214 void AsmJsScanner::Seek(size_t pos) {
215   stream_->Seek(pos);
216   preceding_token_ = kUninitialized;
217   token_ = kUninitialized;
218   next_token_ = kUninitialized;
219   preceding_position_ = 0;
220   position_ = 0;
221   next_position_ = 0;
222   rewind_ = false;
223   Next();
224 }
225 
ConsumeIdentifier(base::uc32 ch)226 void AsmJsScanner::ConsumeIdentifier(base::uc32 ch) {
227   // Consume characters while still part of the identifier.
228   identifier_string_.clear();
229   while (IsIdentifierPart(ch)) {
230     identifier_string_ += ch;
231     ch = stream_->Advance();
232   }
233   // Go back one for next time.
234   stream_->Back();
235 
236   // Decode what the identifier means.
237   if (preceding_token_ == '.') {
238     auto i = property_names_.find(identifier_string_);
239     if (i != property_names_.end()) {
240       token_ = i->second;
241       return;
242     }
243   } else {
244     {
245       auto i = local_names_.find(identifier_string_);
246       if (i != local_names_.end()) {
247         token_ = i->second;
248         return;
249       }
250     }
251     if (!in_local_scope_) {
252       auto i = global_names_.find(identifier_string_);
253       if (i != global_names_.end()) {
254         token_ = i->second;
255         return;
256       }
257     }
258   }
259   if (preceding_token_ == '.') {
260     CHECK_LT(global_count_, kMaxIdentifierCount);
261     token_ = kGlobalsStart + global_count_++;
262     property_names_[identifier_string_] = token_;
263   } else if (in_local_scope_) {
264     CHECK_LT(local_names_.size(), kMaxIdentifierCount);
265     token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
266     local_names_[identifier_string_] = token_;
267   } else {
268     CHECK_LT(global_count_, kMaxIdentifierCount);
269     token_ = kGlobalsStart + global_count_++;
270     global_names_[identifier_string_] = token_;
271   }
272 }
273 
ConsumeNumber(base::uc32 ch)274 void AsmJsScanner::ConsumeNumber(base::uc32 ch) {
275   std::string number;
276   number.assign(1, ch);
277   bool has_dot = ch == '.';
278   bool has_prefix = false;
279   for (;;) {
280     ch = stream_->Advance();
281     if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
282         (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
283         ch == 'x' ||
284         ((ch == '-' || ch == '+') && !has_prefix &&
285          (number[number.size() - 1] == 'e' ||
286           number[number.size() - 1] == 'E'))) {
287       // TODO(bradnelson): Test weird cases ending in -.
288       if (ch == '.') {
289         has_dot = true;
290       }
291       if (ch == 'b' || ch == 'o' || ch == 'x') {
292         has_prefix = true;
293       }
294       number.push_back(ch);
295     } else {
296       break;
297     }
298   }
299   stream_->Back();
300   // Special case the most common number.
301   if (number.size() == 1 && number[0] == '0') {
302     unsigned_value_ = 0;
303     token_ = kUnsigned;
304     return;
305   }
306   // Pick out dot.
307   if (number.size() == 1 && number[0] == '.') {
308     token_ = '.';
309     return;
310   }
311   // Decode numbers.
312   double_value_ = StringToDouble(
313       base::Vector<const uint8_t>::cast(base::VectorOf(number)),
314       ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
315   if (std::isnan(double_value_)) {
316     // Check if string to number conversion didn't consume all the characters.
317     // This happens if the character filter let through something invalid
318     // like: 0123ef for example.
319     // TODO(bradnelson): Check if this happens often enough to be a perf
320     // problem.
321     if (number[0] == '.') {
322       for (size_t k = 1; k < number.size(); ++k) {
323         stream_->Back();
324       }
325       token_ = '.';
326       return;
327     }
328     // Anything else that doesn't parse is an error.
329     token_ = kParseError;
330     return;
331   }
332   if (has_dot || trunc(double_value_) != double_value_) {
333     token_ = kDouble;
334   } else {
335     // Exceeding safe integer range is an error.
336     if (double_value_ > static_cast<double>(kMaxUInt32)) {
337       token_ = kParseError;
338       return;
339     }
340     unsigned_value_ = static_cast<uint32_t>(double_value_);
341     token_ = kUnsigned;
342   }
343 }
344 
ConsumeCComment()345 bool AsmJsScanner::ConsumeCComment() {
346   for (;;) {
347     base::uc32 ch = stream_->Advance();
348     while (ch == '*') {
349       ch = stream_->Advance();
350       if (ch == '/') {
351         return true;
352       }
353     }
354     if (ch == '\n') {
355       preceded_by_newline_ = true;
356     }
357     if (ch == kEndOfInputU) {
358       return false;
359     }
360   }
361 }
362 
ConsumeCPPComment()363 void AsmJsScanner::ConsumeCPPComment() {
364   for (;;) {
365     base::uc32 ch = stream_->Advance();
366     if (ch == '\n') {
367       preceded_by_newline_ = true;
368       return;
369     }
370     if (ch == kEndOfInputU) {
371       return;
372     }
373   }
374 }
375 
ConsumeString(base::uc32 quote)376 void AsmJsScanner::ConsumeString(base::uc32 quote) {
377   // Only string allowed is 'use asm' / "use asm".
378   const char* expected = "use asm";
379   for (; *expected != '\0'; ++expected) {
380     if (stream_->Advance() != static_cast<base::uc32>(*expected)) {
381       token_ = kParseError;
382       return;
383     }
384   }
385   if (stream_->Advance() != quote) {
386     token_ = kParseError;
387     return;
388   }
389   token_ = kToken_UseAsm;
390 }
391 
ConsumeCompareOrShift(base::uc32 ch)392 void AsmJsScanner::ConsumeCompareOrShift(base::uc32 ch) {
393   base::uc32 next_ch = stream_->Advance();
394   if (next_ch == '=') {
395     switch (ch) {
396       case '<':
397         token_ = kToken_LE;
398         break;
399       case '>':
400         token_ = kToken_GE;
401         break;
402       case '=':
403         token_ = kToken_EQ;
404         break;
405       case '!':
406         token_ = kToken_NE;
407         break;
408       default:
409         UNREACHABLE();
410     }
411   } else if (ch == '<' && next_ch == '<') {
412     token_ = kToken_SHL;
413   } else if (ch == '>' && next_ch == '>') {
414     if (stream_->Advance() == '>') {
415       token_ = kToken_SHR;
416     } else {
417       token_ = kToken_SAR;
418       stream_->Back();
419     }
420   } else {
421     stream_->Back();
422     token_ = ch;
423   }
424 }
425 
IsIdentifierStart(base::uc32 ch)426 bool AsmJsScanner::IsIdentifierStart(base::uc32 ch) {
427   return base::IsInRange(AsciiAlphaToLower(ch), 'a', 'z') || ch == '_' ||
428          ch == '$';
429 }
430 
IsIdentifierPart(base::uc32 ch)431 bool AsmJsScanner::IsIdentifierPart(base::uc32 ch) {
432   return IsAsciiIdentifier(ch);
433 }
434 
IsNumberStart(base::uc32 ch)435 bool AsmJsScanner::IsNumberStart(base::uc32 ch) {
436   return ch == '.' || IsDecimalDigit(ch);
437 }
438 
439 }  // namespace internal
440 }  // namespace v8
441