• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
4 // met:
5 //
6 //     * Redistributions of source code must retain the above copyright
7 //       notice, this list of conditions and the following disclaimer.
8 //     * Redistributions in binary form must reproduce the above
9 //       copyright notice, this list of conditions and the following
10 //       disclaimer in the documentation and/or other materials provided
11 //       with the distribution.
12 //     * Neither the name of Google Inc. nor the names of its
13 //       contributors may be used to endorse or promote products derived
14 //       from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 
32 #include "v8.h"
33 
34 #include "isolate.h"
35 #include "token.h"
36 #include "scanner.h"
37 #include "parser.h"
38 #include "utils.h"
39 #include "execution.h"
40 #include "preparser.h"
41 #include "cctest.h"
42 
43 namespace i = ::v8::internal;
44 
TEST(KeywordMatcher)45 TEST(KeywordMatcher) {
46   struct KeywordToken {
47     const char* keyword;
48     i::Token::Value token;
49   };
50 
51   static const KeywordToken keywords[] = {
52 #define KEYWORD(t, s, d) { s, i::Token::t },
53 #define IGNORE(t, s, d)  /* */
54       TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
55 #undef KEYWORD
56       { NULL, i::Token::IDENTIFIER }
57   };
58 
59   static const char* future_keywords[] = {
60 #define FUTURE(t, s, d) s,
61       TOKEN_LIST(IGNORE, IGNORE, FUTURE)
62 #undef FUTURE
63 #undef IGNORE
64       NULL
65   };
66 
67   KeywordToken key_token;
68   for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
69     i::KeywordMatcher matcher;
70     const char* keyword = key_token.keyword;
71     int length = i::StrLength(keyword);
72     for (int j = 0; j < length; j++) {
73       if (key_token.token == i::Token::INSTANCEOF && j == 2) {
74         // "in" is a prefix of "instanceof". It's the only keyword
75         // that is a prefix of another.
76         CHECK_EQ(i::Token::IN, matcher.token());
77       } else {
78         CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
79       }
80       matcher.AddChar(keyword[j]);
81     }
82     CHECK_EQ(key_token.token, matcher.token());
83     // Adding more characters will make keyword matching fail.
84     matcher.AddChar('z');
85     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
86     // Adding a keyword later will not make it match again.
87     matcher.AddChar('i');
88     matcher.AddChar('f');
89     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
90   }
91 
92   // Future keywords are not recognized.
93   const char* future_keyword;
94   for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
95     i::KeywordMatcher matcher;
96     int length = i::StrLength(future_keyword);
97     for (int j = 0; j < length; j++) {
98       matcher.AddChar(future_keyword[j]);
99     }
100     CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
101   }
102 
103   // Zero isn't ignored at first.
104   i::KeywordMatcher bad_start;
105   bad_start.AddChar(0);
106   CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
107   bad_start.AddChar('i');
108   bad_start.AddChar('f');
109   CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
110 
111   // Zero isn't ignored at end.
112   i::KeywordMatcher bad_end;
113   bad_end.AddChar('i');
114   bad_end.AddChar('f');
115   CHECK_EQ(i::Token::IF, bad_end.token());
116   bad_end.AddChar(0);
117   CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
118 
119   // Case isn't ignored.
120   i::KeywordMatcher bad_case;
121   bad_case.AddChar('i');
122   bad_case.AddChar('F');
123   CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
124 
125   // If we mark it as failure, continuing won't help.
126   i::KeywordMatcher full_stop;
127   full_stop.AddChar('i');
128   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
129   full_stop.Fail();
130   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
131   full_stop.AddChar('f');
132   CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
133 }
134 
135 
TEST(ScanHTMLEndComments)136 TEST(ScanHTMLEndComments) {
137   v8::V8::Initialize();
138 
139   // Regression test. See:
140   //    http://code.google.com/p/chromium/issues/detail?id=53548
141   // Tests that --> is correctly interpreted as comment-to-end-of-line if there
142   // is only whitespace before it on the line, even after a multiline-comment
143   // comment. This was not the case if it occurred before the first real token
144   // in the input.
145   const char* tests[] = {
146       // Before first real token.
147       "--> is eol-comment\nvar y = 37;\n",
148       "\n --> is eol-comment\nvar y = 37;\n",
149       "/* precomment */ --> is eol-comment\nvar y = 37;\n",
150       "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
151       // After first real token.
152       "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
153       "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
154       NULL
155   };
156 
157   // Parser/Scanner needs a stack limit.
158   int marker;
159   i::Isolate::Current()->stack_guard()->SetStackLimit(
160       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
161 
162   for (int i = 0; tests[i]; i++) {
163     v8::ScriptData* data =
164         v8::ScriptData::PreCompile(tests[i], i::StrLength(tests[i]));
165     CHECK(data != NULL && !data->HasError());
166     delete data;
167   }
168 }
169 
170 
171 class ScriptResource : public v8::String::ExternalAsciiStringResource {
172  public:
ScriptResource(const char * data,size_t length)173   ScriptResource(const char* data, size_t length)
174       : data_(data), length_(length) { }
175 
data() const176   const char* data() const { return data_; }
length() const177   size_t length() const { return length_; }
178 
179  private:
180   const char* data_;
181   size_t length_;
182 };
183 
184 
TEST(Preparsing)185 TEST(Preparsing) {
186   v8::HandleScope handles;
187   v8::Persistent<v8::Context> context = v8::Context::New();
188   v8::Context::Scope context_scope(context);
189   int marker;
190   i::Isolate::Current()->stack_guard()->SetStackLimit(
191       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
192 
193   // Source containing functions that might be lazily compiled  and all types
194   // of symbols (string, propertyName, regexp).
195   const char* source =
196       "var x = 42;"
197       "function foo(a) { return function nolazy(b) { return a + b; } }"
198       "function bar(a) { if (a) return function lazy(b) { return b; } }"
199       "var z = {'string': 'string literal', bareword: 'propertyName', "
200       "         42: 'number literal', for: 'keyword as propertyName', "
201       "         f\\u006fr: 'keyword propertyname with escape'};"
202       "var v = /RegExp Literal/;"
203       "var w = /RegExp Literal\\u0020With Escape/gin;"
204       "var y = { get getter() { return 42; }, "
205       "          set setter(v) { this.value = v; }};";
206   int source_length = i::StrLength(source);
207   const char* error_source = "var x = y z;";
208   int error_source_length = i::StrLength(error_source);
209 
210   v8::ScriptData* preparse =
211       v8::ScriptData::PreCompile(source, source_length);
212   CHECK(!preparse->HasError());
213   bool lazy_flag = i::FLAG_lazy;
214   {
215     i::FLAG_lazy = true;
216     ScriptResource* resource = new ScriptResource(source, source_length);
217     v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
218     v8::Script::Compile(script_source, NULL, preparse);
219   }
220 
221   {
222     i::FLAG_lazy = false;
223 
224     ScriptResource* resource = new ScriptResource(source, source_length);
225     v8::Local<v8::String> script_source = v8::String::NewExternal(resource);
226     v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
227   }
228   delete preparse;
229   i::FLAG_lazy = lazy_flag;
230 
231   // Syntax error.
232   v8::ScriptData* error_preparse =
233       v8::ScriptData::PreCompile(error_source, error_source_length);
234   CHECK(error_preparse->HasError());
235   i::ScriptDataImpl *pre_impl =
236       reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
237   i::Scanner::Location error_location =
238       pre_impl->MessageLocation();
239   // Error is at "z" in source, location 10..11.
240   CHECK_EQ(10, error_location.beg_pos);
241   CHECK_EQ(11, error_location.end_pos);
242   // Should not crash.
243   const char* message = pre_impl->BuildMessage();
244   i::Vector<const char*> args = pre_impl->BuildArgs();
245   CHECK_GT(strlen(message), 0);
246 }
247 
248 
TEST(StandAlonePreParser)249 TEST(StandAlonePreParser) {
250   v8::V8::Initialize();
251 
252   int marker;
253   i::Isolate::Current()->stack_guard()->SetStackLimit(
254       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
255 
256   const char* programs[] = {
257       "{label: 42}",
258       "var x = 42;",
259       "function foo(x, y) { return x + y; }",
260       "native function foo(); return %ArgleBargle(glop);",
261       "var x = new new Function('this.x = 42');",
262       NULL
263   };
264 
265   uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
266   for (int i = 0; programs[i]; i++) {
267     const char* program = programs[i];
268     i::Utf8ToUC16CharacterStream stream(
269         reinterpret_cast<const i::byte*>(program),
270         static_cast<unsigned>(strlen(program)));
271     i::CompleteParserRecorder log;
272     i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
273     scanner.Initialize(&stream);
274 
275     v8::preparser::PreParser::PreParseResult result =
276         v8::preparser::PreParser::PreParseProgram(&scanner,
277                                                   &log,
278                                                   true,
279                                                   stack_limit);
280     CHECK_EQ(v8::preparser::PreParser::kPreParseSuccess, result);
281     i::ScriptDataImpl data(log.ExtractData());
282     CHECK(!data.has_error());
283   }
284 }
285 
286 
TEST(RegressChromium62639)287 TEST(RegressChromium62639) {
288   v8::V8::Initialize();
289 
290   int marker;
291   i::Isolate::Current()->stack_guard()->SetStackLimit(
292       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
293 
294   const char* program = "var x = 'something';\n"
295                         "escape: function() {}";
296   // Fails parsing expecting an identifier after "function".
297   // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
298   // and then used the invalid currently scanned literal. This always
299   // failed in debug mode, and sometimes crashed in release mode.
300 
301   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
302                                       static_cast<unsigned>(strlen(program)));
303   i::ScriptDataImpl* data =
304       i::ParserApi::PreParse(&stream, NULL);
305   CHECK(data->HasError());
306   delete data;
307 }
308 
309 
TEST(Regress928)310 TEST(Regress928) {
311   v8::V8::Initialize();
312 
313   // Preparsing didn't consider the catch clause of a try statement
314   // as with-content, which made it assume that a function inside
315   // the block could be lazily compiled, and an extra, unexpected,
316   // entry was added to the data.
317   int marker;
318   i::Isolate::Current()->stack_guard()->SetStackLimit(
319       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
320 
321   const char* program =
322       "try { } catch (e) { var foo = function () { /* first */ } }"
323       "var bar = function () { /* second */ }";
324 
325   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
326                                       static_cast<unsigned>(strlen(program)));
327   i::ScriptDataImpl* data =
328       i::ParserApi::PartialPreParse(&stream, NULL);
329   CHECK(!data->HasError());
330 
331   data->Initialize();
332 
333   int first_function =
334       static_cast<int>(strstr(program, "function") - program);
335   int first_lbrace = first_function + static_cast<int>(strlen("function () "));
336   CHECK_EQ('{', program[first_lbrace]);
337   i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
338   CHECK(!entry1.is_valid());
339 
340   int second_function =
341       static_cast<int>(strstr(program + first_lbrace, "function") - program);
342   int second_lbrace =
343       second_function + static_cast<int>(strlen("function () "));
344   CHECK_EQ('{', program[second_lbrace]);
345   i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
346   CHECK(entry2.is_valid());
347   CHECK_EQ('}', program[entry2.end_pos() - 1]);
348   delete data;
349 }
350 
351 
TEST(PreParseOverflow)352 TEST(PreParseOverflow) {
353   v8::V8::Initialize();
354 
355   int marker;
356   i::Isolate::Current()->stack_guard()->SetStackLimit(
357       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
358 
359   size_t kProgramSize = 1024 * 1024;
360   i::SmartPointer<char> program(
361       reinterpret_cast<char*>(malloc(kProgramSize + 1)));
362   memset(*program, '(', kProgramSize);
363   program[kProgramSize] = '\0';
364 
365   uintptr_t stack_limit = i::Isolate::Current()->stack_guard()->real_climit();
366 
367   i::Utf8ToUC16CharacterStream stream(
368       reinterpret_cast<const i::byte*>(*program),
369       static_cast<unsigned>(kProgramSize));
370   i::CompleteParserRecorder log;
371   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
372   scanner.Initialize(&stream);
373 
374 
375   v8::preparser::PreParser::PreParseResult result =
376       v8::preparser::PreParser::PreParseProgram(&scanner,
377                                                 &log,
378                                                 true,
379                                                 stack_limit);
380   CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
381 }
382 
383 
384 class TestExternalResource: public v8::String::ExternalStringResource {
385  public:
TestExternalResource(uint16_t * data,int length)386   explicit TestExternalResource(uint16_t* data, int length)
387       : data_(data), length_(static_cast<size_t>(length)) { }
388 
~TestExternalResource()389   ~TestExternalResource() { }
390 
data() const391   const uint16_t* data() const {
392     return data_;
393   }
394 
length() const395   size_t length() const {
396     return length_;
397   }
398  private:
399   uint16_t* data_;
400   size_t length_;
401 };
402 
403 
404 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
405 
TestCharacterStream(const char * ascii_source,unsigned length,unsigned start=0,unsigned end=0)406 void TestCharacterStream(const char* ascii_source,
407                          unsigned length,
408                          unsigned start = 0,
409                          unsigned end = 0) {
410   if (end == 0) end = length;
411   unsigned sub_length = end - start;
412   i::HandleScope test_scope;
413   i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
414   for (unsigned i = 0; i < length; i++) {
415     uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
416   }
417   i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
418   i::Handle<i::String> ascii_string(
419       FACTORY->NewStringFromAscii(ascii_vector));
420   TestExternalResource resource(*uc16_buffer, length);
421   i::Handle<i::String> uc16_string(
422       FACTORY->NewExternalStringFromTwoByte(&resource));
423 
424   i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
425       i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
426   i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
427   i::Utf8ToUC16CharacterStream utf8_stream(
428       reinterpret_cast<const i::byte*>(ascii_source), end);
429   utf8_stream.SeekForward(start);
430 
431   unsigned i = start;
432   while (i < end) {
433     // Read streams one char at a time
434     CHECK_EQU(i, uc16_stream.pos());
435     CHECK_EQU(i, string_stream.pos());
436     CHECK_EQU(i, utf8_stream.pos());
437     int32_t c0 = ascii_source[i];
438     int32_t c1 = uc16_stream.Advance();
439     int32_t c2 = string_stream.Advance();
440     int32_t c3 = utf8_stream.Advance();
441     i++;
442     CHECK_EQ(c0, c1);
443     CHECK_EQ(c0, c2);
444     CHECK_EQ(c0, c3);
445     CHECK_EQU(i, uc16_stream.pos());
446     CHECK_EQU(i, string_stream.pos());
447     CHECK_EQU(i, utf8_stream.pos());
448   }
449   while (i > start + sub_length / 4) {
450     // Pushback, re-read, pushback again.
451     int32_t c0 = ascii_source[i - 1];
452     CHECK_EQU(i, uc16_stream.pos());
453     CHECK_EQU(i, string_stream.pos());
454     CHECK_EQU(i, utf8_stream.pos());
455     uc16_stream.PushBack(c0);
456     string_stream.PushBack(c0);
457     utf8_stream.PushBack(c0);
458     i--;
459     CHECK_EQU(i, uc16_stream.pos());
460     CHECK_EQU(i, string_stream.pos());
461     CHECK_EQU(i, utf8_stream.pos());
462     int32_t c1 = uc16_stream.Advance();
463     int32_t c2 = string_stream.Advance();
464     int32_t c3 = utf8_stream.Advance();
465     i++;
466     CHECK_EQU(i, uc16_stream.pos());
467     CHECK_EQU(i, string_stream.pos());
468     CHECK_EQU(i, utf8_stream.pos());
469     CHECK_EQ(c0, c1);
470     CHECK_EQ(c0, c2);
471     CHECK_EQ(c0, c3);
472     uc16_stream.PushBack(c0);
473     string_stream.PushBack(c0);
474     utf8_stream.PushBack(c0);
475     i--;
476     CHECK_EQU(i, uc16_stream.pos());
477     CHECK_EQU(i, string_stream.pos());
478     CHECK_EQU(i, utf8_stream.pos());
479   }
480   unsigned halfway = start + sub_length / 2;
481   uc16_stream.SeekForward(halfway - i);
482   string_stream.SeekForward(halfway - i);
483   utf8_stream.SeekForward(halfway - i);
484   i = halfway;
485   CHECK_EQU(i, uc16_stream.pos());
486   CHECK_EQU(i, string_stream.pos());
487   CHECK_EQU(i, utf8_stream.pos());
488 
489   while (i < end) {
490     // Read streams one char at a time
491     CHECK_EQU(i, uc16_stream.pos());
492     CHECK_EQU(i, string_stream.pos());
493     CHECK_EQU(i, utf8_stream.pos());
494     int32_t c0 = ascii_source[i];
495     int32_t c1 = uc16_stream.Advance();
496     int32_t c2 = string_stream.Advance();
497     int32_t c3 = utf8_stream.Advance();
498     i++;
499     CHECK_EQ(c0, c1);
500     CHECK_EQ(c0, c2);
501     CHECK_EQ(c0, c3);
502     CHECK_EQU(i, uc16_stream.pos());
503     CHECK_EQU(i, string_stream.pos());
504     CHECK_EQU(i, utf8_stream.pos());
505   }
506 
507   int32_t c1 = uc16_stream.Advance();
508   int32_t c2 = string_stream.Advance();
509   int32_t c3 = utf8_stream.Advance();
510   CHECK_LT(c1, 0);
511   CHECK_LT(c2, 0);
512   CHECK_LT(c3, 0);
513 }
514 
515 
TEST(CharacterStreams)516 TEST(CharacterStreams) {
517   v8::HandleScope handles;
518   v8::Persistent<v8::Context> context = v8::Context::New();
519   v8::Context::Scope context_scope(context);
520 
521   TestCharacterStream("abc\0\n\r\x7f", 7);
522   static const unsigned kBigStringSize = 4096;
523   char buffer[kBigStringSize + 1];
524   for (unsigned i = 0; i < kBigStringSize; i++) {
525     buffer[i] = static_cast<char>(i & 0x7f);
526   }
527   TestCharacterStream(buffer, kBigStringSize);
528 
529   TestCharacterStream(buffer, kBigStringSize, 576, 3298);
530 
531   TestCharacterStream("\0", 1);
532   TestCharacterStream("", 0);
533 }
534 
535 
TEST(Utf8CharacterStream)536 TEST(Utf8CharacterStream) {
537   static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
538   static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
539 
540   static const int kAllUtf8CharsSize =
541       (unibrow::Utf8::kMaxOneByteChar + 1) +
542       (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
543       (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
544   static const unsigned kAllUtf8CharsSizeU =
545       static_cast<unsigned>(kAllUtf8CharsSize);
546 
547   char buffer[kAllUtf8CharsSizeU];
548   unsigned cursor = 0;
549   for (int i = 0; i <= kMaxUC16Char; i++) {
550     cursor += unibrow::Utf8::Encode(buffer + cursor, i);
551   }
552   ASSERT(cursor == kAllUtf8CharsSizeU);
553 
554   i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
555                                       kAllUtf8CharsSizeU);
556   for (int i = 0; i <= kMaxUC16Char; i++) {
557     CHECK_EQU(i, stream.pos());
558     int32_t c = stream.Advance();
559     CHECK_EQ(i, c);
560     CHECK_EQU(i + 1, stream.pos());
561   }
562   for (int i = kMaxUC16Char; i >= 0; i--) {
563     CHECK_EQU(i + 1, stream.pos());
564     stream.PushBack(i);
565     CHECK_EQU(i, stream.pos());
566   }
567   int i = 0;
568   while (stream.pos() < kMaxUC16CharU) {
569     CHECK_EQU(i, stream.pos());
570     unsigned progress = stream.SeekForward(12);
571     i += progress;
572     int32_t c = stream.Advance();
573     if (i <= kMaxUC16Char) {
574       CHECK_EQ(i, c);
575     } else {
576       CHECK_EQ(-1, c);
577     }
578     i += 1;
579     CHECK_EQU(i, stream.pos());
580   }
581 }
582 
583 #undef CHECK_EQU
584 
TestStreamScanner(i::UC16CharacterStream * stream,i::Token::Value * expected_tokens,int skip_pos=0,int skip_to=0)585 void TestStreamScanner(i::UC16CharacterStream* stream,
586                        i::Token::Value* expected_tokens,
587                        int skip_pos = 0,  // Zero means not skipping.
588                        int skip_to = 0) {
589   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
590   scanner.Initialize(stream);
591 
592   int i = 0;
593   do {
594     i::Token::Value expected = expected_tokens[i];
595     i::Token::Value actual = scanner.Next();
596     CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
597     if (scanner.location().end_pos == skip_pos) {
598       scanner.SeekForward(skip_to);
599     }
600     i++;
601   } while (expected_tokens[i] != i::Token::ILLEGAL);
602 }
603 
TEST(StreamScanner)604 TEST(StreamScanner) {
605   v8::V8::Initialize();
606 
607   const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
608   i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
609                                        static_cast<unsigned>(strlen(str1)));
610   i::Token::Value expectations1[] = {
611       i::Token::LBRACE,
612       i::Token::IDENTIFIER,
613       i::Token::IDENTIFIER,
614       i::Token::FOR,
615       i::Token::COLON,
616       i::Token::MUL,
617       i::Token::DIV,
618       i::Token::LT,
619       i::Token::SUB,
620       i::Token::IDENTIFIER,
621       i::Token::EOS,
622       i::Token::ILLEGAL
623   };
624   TestStreamScanner(&stream1, expectations1, 0, 0);
625 
626   const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
627   i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
628                                        static_cast<unsigned>(strlen(str2)));
629   i::Token::Value expectations2[] = {
630       i::Token::CASE,
631       i::Token::DEFAULT,
632       i::Token::CONST,
633       i::Token::LBRACE,
634       // Skipped part here
635       i::Token::RBRACE,
636       i::Token::DO,
637       i::Token::EOS,
638       i::Token::ILLEGAL
639   };
640   ASSERT_EQ('{', str2[19]);
641   ASSERT_EQ('}', str2[37]);
642   TestStreamScanner(&stream2, expectations2, 20, 37);
643 
644   const char* str3 = "{}}}}";
645   i::Token::Value expectations3[] = {
646       i::Token::LBRACE,
647       i::Token::RBRACE,
648       i::Token::RBRACE,
649       i::Token::RBRACE,
650       i::Token::RBRACE,
651       i::Token::EOS,
652       i::Token::ILLEGAL
653   };
654   // Skip zero-four RBRACEs.
655   for (int i = 0; i <= 4; i++) {
656      expectations3[6 - i] = i::Token::ILLEGAL;
657      expectations3[5 - i] = i::Token::EOS;
658      i::Utf8ToUC16CharacterStream stream3(
659          reinterpret_cast<const i::byte*>(str3),
660          static_cast<unsigned>(strlen(str3)));
661      TestStreamScanner(&stream3, expectations3, 1, 1 + i);
662   }
663 }
664 
665 
TestScanRegExp(const char * re_source,const char * expected)666 void TestScanRegExp(const char* re_source, const char* expected) {
667   i::Utf8ToUC16CharacterStream stream(
668        reinterpret_cast<const i::byte*>(re_source),
669        static_cast<unsigned>(strlen(re_source)));
670   i::V8JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
671   scanner.Initialize(&stream);
672 
673   i::Token::Value start = scanner.peek();
674   CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
675   CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
676   scanner.Next();  // Current token is now the regexp literal.
677   CHECK(scanner.is_literal_ascii());
678   i::Vector<const char> actual = scanner.literal_ascii_string();
679   for (int i = 0; i < actual.length(); i++) {
680     CHECK_NE('\0', expected[i]);
681     CHECK_EQ(expected[i], actual[i]);
682   }
683 }
684 
685 
TEST(RegExpScanning)686 TEST(RegExpScanning) {
687   v8::V8::Initialize();
688 
689   // RegExp token with added garbage at the end. The scanner should only
690   // scan the RegExp until the terminating slash just before "flipperwald".
691   TestScanRegExp("/b/flipperwald", "b");
692   // Incomplete escape sequences doesn't hide the terminating slash.
693   TestScanRegExp("/\\x/flipperwald", "\\x");
694   TestScanRegExp("/\\u/flipperwald", "\\u");
695   TestScanRegExp("/\\u1/flipperwald", "\\u1");
696   TestScanRegExp("/\\u12/flipperwald", "\\u12");
697   TestScanRegExp("/\\u123/flipperwald", "\\u123");
698   TestScanRegExp("/\\c/flipperwald", "\\c");
699   TestScanRegExp("/\\c//flipperwald", "\\c");
700   // Slashes inside character classes are not terminating.
701   TestScanRegExp("/[/]/flipperwald", "[/]");
702   TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
703   // Incomplete escape sequences inside a character class doesn't hide
704   // the end of the character class.
705   TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
706   TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
707   TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
708   TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
709   TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
710   TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
711   TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
712   TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
713   // Escaped ']'s wont end the character class.
714   TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
715   // Escaped slashes are not terminating.
716   TestScanRegExp("/\\//flipperwald", "\\/");
717   // Starting with '=' works too.
718   TestScanRegExp("/=/", "=");
719   TestScanRegExp("/=?/", "=?");
720 }
721